refactor(memory): bridge host sdk duplicates

This commit is contained in:
Peter Steinberger
2026-04-29 13:13:07 +01:00
parent f52958ad67
commit 1dd37f5c90
20 changed files with 110 additions and 3848 deletions

View File

@@ -1,15 +1,38 @@
export {
CHARS_PER_TOKEN_ESTIMATE,
DEFAULT_SQLITE_WAL_AUTOCHECKPOINT_PAGES,
DEFAULT_SQLITE_WAL_TRUNCATE_INTERVAL_MS,
applyWindowsSpawnProgramPolicy,
configureSqliteWalMaintenance,
createSubsystemLogger,
detectMime,
estimateStringChars,
installProcessWarningFilter,
materializeWindowsSpawnProgram,
redactSensitiveText,
resolveGlobalSingleton,
resolveUserPath,
resolveWindowsExecutablePath,
resolveWindowsSpawnProgram,
resolveWindowsSpawnProgramCandidate,
runTasksWithConcurrency,
shortenHomeInString,
shortenHomePath,
shouldIgnoreWarning,
splitShellArgs,
truncateUtf16Safe,
writeFileWithinRoot,
} from "./openclaw-runtime.js";
export type {
ProcessWarning,
ResolveWindowsSpawnProgramCandidateParams,
ResolveWindowsSpawnProgramParams,
SqliteWalMaintenance,
SqliteWalMaintenanceOptions,
WindowsSpawnCandidateResolution,
WindowsSpawnInvocation,
WindowsSpawnProgram,
WindowsSpawnProgramCandidate,
WindowsSpawnResolution,
} from "./openclaw-runtime.js";

View File

@@ -78,6 +78,20 @@ export { writeFileWithinRoot } from "../../../../src/infra/fs-safe.js";
export { fetchWithSsrFGuard } from "../../../../src/infra/net/fetch-guard.js";
export { shouldUseEnvHttpProxyForUrl } from "../../../../src/infra/net/proxy-env.js";
export { ssrfPolicyFromHttpBaseUrlAllowedHostname } from "../../../../src/infra/net/ssrf.js";
export {
DEFAULT_SQLITE_WAL_AUTOCHECKPOINT_PAGES,
DEFAULT_SQLITE_WAL_TRUNCATE_INTERVAL_MS,
configureSqliteWalMaintenance,
} from "../../../../src/infra/sqlite-wal.js";
export type {
SqliteWalMaintenance,
SqliteWalMaintenanceOptions,
} from "../../../../src/infra/sqlite-wal.js";
export {
installProcessWarningFilter,
shouldIgnoreWarning,
} from "../../../../src/infra/warning-filter.js";
export type { ProcessWarning } from "../../../../src/infra/warning-filter.js";
export { redactSensitiveText } from "../../../../src/logging/redact.js";
export { createSubsystemLogger } from "../../../../src/logging/subsystem.js";
export { detectMime } from "../../../../src/media/mime.js";
@@ -136,4 +150,20 @@ export {
shortenHomePath,
truncateUtf16Safe,
} from "../../../../src/utils.js";
export {
applyWindowsSpawnProgramPolicy,
materializeWindowsSpawnProgram,
resolveWindowsExecutablePath,
resolveWindowsSpawnProgram,
resolveWindowsSpawnProgramCandidate,
} from "../../../../src/plugin-sdk/windows-spawn.js";
export type {
ResolveWindowsSpawnProgramCandidateParams,
ResolveWindowsSpawnProgramParams,
WindowsSpawnCandidateResolution,
WindowsSpawnInvocation,
WindowsSpawnProgram,
WindowsSpawnProgramCandidate,
WindowsSpawnResolution,
} from "../../../../src/plugin-sdk/windows-spawn.js";
export { resolveGlobalSingleton } from "../../../../src/shared/global-singleton.js";

View File

@@ -1,74 +1,6 @@
import type { DatabaseSync } from "node:sqlite";
const DEFAULT_SQLITE_WAL_AUTOCHECKPOINT_PAGES = 1000;
const DEFAULT_SQLITE_WAL_TRUNCATE_INTERVAL_MS = 30 * 60 * 1000;
type IntervalHandle = ReturnType<typeof setInterval> & {
unref?: () => void;
};
type SqliteWalCheckpointMode = "PASSIVE" | "FULL" | "RESTART" | "TRUNCATE";
export type SqliteWalMaintenance = {
checkpoint: () => boolean;
close: () => boolean;
};
export type SqliteWalMaintenanceOptions = {
autoCheckpointPages?: number;
checkpointIntervalMs?: number;
checkpointMode?: SqliteWalCheckpointMode;
onCheckpointError?: (error: unknown) => void;
};
function normalizeNonNegativeInteger(value: number, label: string): number {
if (!Number.isInteger(value) || value < 0) {
throw new Error(`${label} must be a non-negative integer`);
}
return value;
}
export function configureSqliteWalMaintenance(
db: DatabaseSync,
options: SqliteWalMaintenanceOptions = {},
): SqliteWalMaintenance {
const autoCheckpointPages = normalizeNonNegativeInteger(
options.autoCheckpointPages ?? DEFAULT_SQLITE_WAL_AUTOCHECKPOINT_PAGES,
"autoCheckpointPages",
);
const checkpointIntervalMs = normalizeNonNegativeInteger(
options.checkpointIntervalMs ?? DEFAULT_SQLITE_WAL_TRUNCATE_INTERVAL_MS,
"checkpointIntervalMs",
);
const checkpointMode = options.checkpointMode ?? "TRUNCATE";
db.exec("PRAGMA journal_mode = WAL;");
db.exec(`PRAGMA wal_autocheckpoint = ${autoCheckpointPages};`);
const checkpoint = (): boolean => {
try {
db.exec(`PRAGMA wal_checkpoint(${checkpointMode});`);
return true;
} catch (error) {
options.onCheckpointError?.(error);
return false;
}
};
let timer: IntervalHandle | null = null;
if (checkpointIntervalMs > 0) {
timer = setInterval(checkpoint, checkpointIntervalMs) as IntervalHandle;
timer.unref?.();
}
return {
checkpoint,
close: () => {
if (timer) {
clearInterval(timer);
timer = null;
}
return checkpoint();
},
};
}
export {
DEFAULT_SQLITE_WAL_AUTOCHECKPOINT_PAGES,
DEFAULT_SQLITE_WAL_TRUNCATE_INTERVAL_MS,
configureSqliteWalMaintenance,
} from "./openclaw-runtime-io.js";
export type { SqliteWalMaintenance, SqliteWalMaintenanceOptions } from "./openclaw-runtime-io.js";

View File

@@ -1,105 +1,2 @@
const warningFilterKey = Symbol.for("openclaw.warning-filter");
export type ProcessWarning = {
code?: string;
name?: string;
message?: string;
};
type ProcessWarningInstallState = {
installed: boolean;
};
function resolveWarningFilterState(): ProcessWarningInstallState {
const globalStore = globalThis as Record<PropertyKey, unknown>;
if (Object.prototype.hasOwnProperty.call(globalStore, warningFilterKey)) {
return globalStore[warningFilterKey] as ProcessWarningInstallState;
}
const state = { installed: false };
globalStore[warningFilterKey] = state;
return state;
}
export function shouldIgnoreWarning(warning: ProcessWarning): boolean {
if (warning.code === "DEP0040" && warning.message?.includes("punycode")) {
return true;
}
if (warning.code === "DEP0060" && warning.message?.includes("util._extend")) {
return true;
}
if (
warning.name === "ExperimentalWarning" &&
warning.message?.includes("SQLite is an experimental feature")
) {
return true;
}
return false;
}
function normalizeWarningArgs(args: unknown[]): ProcessWarning {
const warningArg = args[0];
const secondArg = args[1];
const thirdArg = args[2];
let name: string | undefined;
let code: string | undefined;
let message: string | undefined;
if (warningArg instanceof Error) {
name = warningArg.name;
message = warningArg.message;
code = (warningArg as Error & { code?: string }).code;
} else if (typeof warningArg === "string") {
message = warningArg;
}
if (secondArg && typeof secondArg === "object" && !Array.isArray(secondArg)) {
const options = secondArg as { type?: unknown; code?: unknown };
if (typeof options.type === "string") {
name = options.type;
}
if (typeof options.code === "string") {
code = options.code;
}
} else {
if (typeof secondArg === "string") {
name = secondArg;
}
if (typeof thirdArg === "string") {
code = thirdArg;
}
}
return { name, code, message };
}
export function installProcessWarningFilter(): void {
const state = resolveWarningFilterState();
if (state.installed) {
return;
}
const originalEmitWarning = process.emitWarning.bind(process);
const wrappedEmitWarning: typeof process.emitWarning = ((...args: unknown[]) => {
if (shouldIgnoreWarning(normalizeWarningArgs(args))) {
return;
}
if (
args[0] instanceof Error &&
args[1] &&
typeof args[1] === "object" &&
!Array.isArray(args[1])
) {
const warning = args[0];
const emitted = Object.assign(new Error(warning.message), {
name: warning.name,
code: (warning as Error & { code?: string }).code,
});
process.emit("warning", emitted);
return;
}
Reflect.apply(originalEmitWarning, process, args);
}) as typeof process.emitWarning;
process.emitWarning = wrappedEmitWarning;
state.installed = true;
}
export { installProcessWarningFilter, shouldIgnoreWarning } from "./openclaw-runtime-io.js";
export type { ProcessWarning } from "./openclaw-runtime-io.js";

View File

@@ -1,285 +1,9 @@
import { readFileSync, statSync } from "node:fs";
import path from "node:path";
import { normalizeLowercaseStringOrEmpty, normalizeOptionalString } from "./string-utils.js";
type WindowsSpawnResolution = "direct" | "node-entrypoint" | "exe-entrypoint" | "shell-fallback";
type WindowsSpawnCandidateResolution = Exclude<WindowsSpawnResolution, "shell-fallback">;
type WindowsSpawnProgramCandidate = {
command: string;
leadingArgv: string[];
resolution: WindowsSpawnCandidateResolution | "unresolved-wrapper";
windowsHide?: boolean;
};
export type WindowsSpawnProgram = {
command: string;
leadingArgv: string[];
resolution: WindowsSpawnResolution;
shell?: boolean;
windowsHide?: boolean;
};
export type WindowsSpawnInvocation = {
command: string;
argv: string[];
resolution: WindowsSpawnResolution;
shell?: boolean;
windowsHide?: boolean;
};
export type ResolveWindowsSpawnProgramParams = {
command: string;
platform?: NodeJS.Platform;
env?: NodeJS.ProcessEnv;
execPath?: string;
packageName?: string;
allowShellFallback?: boolean;
};
function isFilePath(candidate: string): boolean {
try {
return statSync(candidate).isFile();
} catch {
return false;
}
}
function resolveWindowsExecutablePath(command: string, env: NodeJS.ProcessEnv): string {
if (command.includes("/") || command.includes("\\") || path.isAbsolute(command)) {
return command;
}
const pathValue = env.PATH ?? env.Path ?? process.env.PATH ?? process.env.Path ?? "";
const pathEntries = pathValue
.split(";")
.map((entry) => entry.trim())
.filter(Boolean);
const hasExtension = path.extname(command).length > 0;
const pathExtRaw =
env.PATHEXT ??
env.Pathext ??
process.env.PATHEXT ??
process.env.Pathext ??
".EXE;.CMD;.BAT;.COM";
const pathExt = hasExtension
? [""]
: pathExtRaw
.split(";")
.map((ext) => ext.trim())
.filter(Boolean)
.map((ext) => (ext.startsWith(".") ? ext : `.${ext}`));
for (const dir of pathEntries) {
for (const ext of pathExt) {
const normalizedExt = normalizeLowercaseStringOrEmpty(ext);
const uppercaseExt = ext.toUpperCase();
for (const candidateExt of [ext, normalizedExt, uppercaseExt]) {
const candidate = path.join(dir, `${command}${candidateExt}`);
if (isFilePath(candidate)) {
return candidate;
}
}
}
}
return command;
}
function resolveEntrypointFromCmdShim(wrapperPath: string): string | null {
if (!isFilePath(wrapperPath)) {
return null;
}
try {
const content = readFileSync(wrapperPath, "utf8");
const candidates: string[] = [];
for (const match of content.matchAll(/"([^"\r\n]*)"/g)) {
const token = match[1] ?? "";
const relMatch = token.match(/%~?dp0%?\s*[\\/]*(.*)$/i);
const relative = relMatch?.[1]?.trim();
if (!relative) {
continue;
}
const normalizedRelative = relative.replace(/[\\/]+/g, path.sep).replace(/^[\\/]+/, "");
const candidate = path.resolve(path.dirname(wrapperPath), normalizedRelative);
if (isFilePath(candidate)) {
candidates.push(candidate);
}
}
const nonNode = candidates.find((candidate) => {
const base = normalizeLowercaseStringOrEmpty(path.basename(candidate));
return base !== "node.exe" && base !== "node";
});
return nonNode ?? null;
} catch {
return null;
}
}
function resolveBinEntry(
packageName: string | undefined,
binField: string | Record<string, string> | undefined,
): string | null {
if (typeof binField === "string") {
return normalizeOptionalString(binField) ?? null;
}
if (!binField || typeof binField !== "object") {
return null;
}
if (packageName) {
const preferred = binField[packageName];
const normalizedPreferred =
typeof preferred === "string" ? normalizeOptionalString(preferred) : undefined;
if (normalizedPreferred) {
return normalizedPreferred;
}
}
for (const value of Object.values(binField)) {
const normalizedValue = typeof value === "string" ? normalizeOptionalString(value) : undefined;
if (normalizedValue) {
return normalizedValue;
}
}
return null;
}
function resolveEntrypointFromPackageJson(
wrapperPath: string,
packageName?: string,
): string | null {
if (!packageName) {
return null;
}
const wrapperDir = path.dirname(wrapperPath);
const packageDirs = [
path.resolve(wrapperDir, "..", packageName),
path.resolve(wrapperDir, "node_modules", packageName),
];
for (const packageDir of packageDirs) {
const packageJsonPath = path.join(packageDir, "package.json");
if (!isFilePath(packageJsonPath)) {
continue;
}
try {
const packageJson = JSON.parse(readFileSync(packageJsonPath, "utf8")) as {
bin?: string | Record<string, string>;
};
const entryRel = resolveBinEntry(packageName, packageJson.bin);
if (!entryRel) {
continue;
}
const entryPath = path.resolve(packageDir, entryRel);
if (isFilePath(entryPath)) {
return entryPath;
}
} catch {
// Ignore malformed package metadata.
}
}
return null;
}
function resolveWindowsSpawnProgramCandidate(
params: ResolveWindowsSpawnProgramParams,
): WindowsSpawnProgramCandidate {
const platform = params.platform ?? process.platform;
const env = params.env ?? process.env;
const execPath = params.execPath ?? process.execPath;
if (platform !== "win32") {
return {
command: params.command,
leadingArgv: [],
resolution: "direct",
};
}
const resolvedCommand = resolveWindowsExecutablePath(params.command, env);
const ext = normalizeLowercaseStringOrEmpty(path.extname(resolvedCommand));
if (ext === ".js" || ext === ".cjs" || ext === ".mjs") {
return {
command: execPath,
leadingArgv: [resolvedCommand],
resolution: "node-entrypoint",
windowsHide: true,
};
}
if (ext === ".cmd" || ext === ".bat") {
const entrypoint =
resolveEntrypointFromCmdShim(resolvedCommand) ??
resolveEntrypointFromPackageJson(resolvedCommand, params.packageName);
if (entrypoint) {
const entryExt = normalizeLowercaseStringOrEmpty(path.extname(entrypoint));
if (entryExt === ".exe") {
return {
command: entrypoint,
leadingArgv: [],
resolution: "exe-entrypoint",
windowsHide: true,
};
}
return {
command: execPath,
leadingArgv: [entrypoint],
resolution: "node-entrypoint",
windowsHide: true,
};
}
return {
command: resolvedCommand,
leadingArgv: [],
resolution: "unresolved-wrapper",
};
}
return {
command: resolvedCommand,
leadingArgv: [],
resolution: "direct",
};
}
export function resolveWindowsSpawnProgram(
params: ResolveWindowsSpawnProgramParams,
): WindowsSpawnProgram {
const candidate = resolveWindowsSpawnProgramCandidate(params);
if (candidate.resolution !== "unresolved-wrapper") {
return {
command: candidate.command,
leadingArgv: candidate.leadingArgv,
resolution: candidate.resolution,
windowsHide: candidate.windowsHide,
};
}
if (params.allowShellFallback === true) {
return {
command: candidate.command,
leadingArgv: [],
resolution: "shell-fallback",
shell: true,
};
}
throw new Error(
`${path.basename(candidate.command)} wrapper resolved, but no executable/Node entrypoint could be resolved without shell execution.`,
);
}
export function materializeWindowsSpawnProgram(
program: WindowsSpawnProgram,
argv: string[],
): WindowsSpawnInvocation {
return {
command: program.command,
argv: [...program.leadingArgv, ...argv],
resolution: program.resolution,
shell: program.shell,
windowsHide: program.windowsHide,
};
}
export {
materializeWindowsSpawnProgram,
resolveWindowsSpawnProgram,
} from "./openclaw-runtime-io.js";
export type {
ResolveWindowsSpawnProgramParams,
WindowsSpawnInvocation,
WindowsSpawnProgram,
} from "./openclaw-runtime-io.js";

View File

@@ -1,460 +1 @@
import fs from "node:fs";
import path from "node:path";
import { resolveAgentWorkspaceDir } from "../../agents/agent-scope-config.js";
import { parseDurationMs } from "../../cli/parse-duration.js";
import type { SessionSendPolicyConfig } from "../../config/types.base.js";
import type {
MemoryBackend,
MemoryCitationsMode,
MemoryQmdConfig,
MemoryQmdIndexPath,
MemoryQmdMcporterConfig,
MemoryQmdSearchMode,
MemoryQmdStartupMode,
} from "../../config/types.memory.js";
import type { OpenClawConfig } from "../../config/types.openclaw.js";
import { CANONICAL_ROOT_MEMORY_FILENAME } from "../../memory/root-memory-files.js";
import { normalizeAgentId } from "../../routing/session-key.js";
import {
normalizeLowercaseStringOrEmpty,
normalizeOptionalString,
} from "../../shared/string-coerce.js";
import { resolveUserPath } from "../../utils.js";
import { splitShellArgs } from "../../utils/shell-argv.js";
export type ResolvedMemoryBackendConfig = {
backend: MemoryBackend;
citations: MemoryCitationsMode;
qmd?: ResolvedQmdConfig;
};
export type ResolvedQmdCollection = {
name: string;
path: string;
pattern: string;
kind: "memory" | "custom" | "sessions";
};
export type ResolvedQmdUpdateConfig = {
intervalMs: number;
debounceMs: number;
onBoot: boolean;
startup: MemoryQmdStartupMode;
startupDelayMs: number;
waitForBootSync: boolean;
embedIntervalMs: number;
commandTimeoutMs: number;
updateTimeoutMs: number;
embedTimeoutMs: number;
};
export type ResolvedQmdLimitsConfig = {
maxResults: number;
maxSnippetChars: number;
maxInjectedChars: number;
timeoutMs: number;
};
export type ResolvedQmdSessionConfig = {
enabled: boolean;
exportDir?: string;
retentionDays?: number;
};
export type ResolvedQmdMcporterConfig = {
enabled: boolean;
serverName: string;
startDaemon: boolean;
};
export type ResolvedQmdConfig = {
command: string;
mcporter: ResolvedQmdMcporterConfig;
searchMode: MemoryQmdSearchMode;
searchTool?: string;
collections: ResolvedQmdCollection[];
sessions: ResolvedQmdSessionConfig;
update: ResolvedQmdUpdateConfig;
limits: ResolvedQmdLimitsConfig;
includeDefaultMemory: boolean;
scope?: SessionSendPolicyConfig;
};
const DEFAULT_BACKEND: MemoryBackend = "builtin";
const DEFAULT_CITATIONS: MemoryCitationsMode = "auto";
const DEFAULT_QMD_INTERVAL = "5m";
const DEFAULT_QMD_DEBOUNCE_MS = 15_000;
const DEFAULT_QMD_TIMEOUT_MS = 4_000;
// Defaulting to `query` can be extremely slow on CPU-only systems (query expansion + rerank).
// Prefer a faster mode for interactive use; users can opt into `query` for best recall.
const DEFAULT_QMD_SEARCH_MODE: MemoryQmdSearchMode = "search";
const DEFAULT_QMD_STARTUP: MemoryQmdStartupMode = "off";
const DEFAULT_QMD_STARTUP_DELAY_MS = 120_000;
const DEFAULT_QMD_EMBED_INTERVAL = "60m";
const DEFAULT_QMD_COMMAND_TIMEOUT_MS = 30_000;
const DEFAULT_QMD_UPDATE_TIMEOUT_MS = 120_000;
const DEFAULT_QMD_EMBED_TIMEOUT_MS = 120_000;
const DEFAULT_QMD_LIMITS: ResolvedQmdLimitsConfig = {
maxResults: 4,
maxSnippetChars: 450,
maxInjectedChars: 2_200,
timeoutMs: DEFAULT_QMD_TIMEOUT_MS,
};
const DEFAULT_QMD_MCPORTER: ResolvedQmdMcporterConfig = {
enabled: false,
serverName: "qmd",
startDaemon: true,
};
const DEFAULT_QMD_SCOPE: SessionSendPolicyConfig = {
default: "deny",
rules: [
{
action: "allow",
match: { chatType: "direct" },
},
{
action: "allow",
match: { chatType: "channel" },
},
],
};
function sanitizeName(input: string): string {
const lower = normalizeLowercaseStringOrEmpty(input).replace(/[^a-z0-9-]+/g, "-");
const trimmed = lower.replace(/^-+|-+$/g, "");
return trimmed || "collection";
}
function scopeCollectionBase(base: string, agentId: string): string {
return `${base}-${sanitizeName(agentId)}`;
}
function canonicalizePathForContainment(rawPath: string): string {
const resolved = path.resolve(rawPath);
let current = resolved;
const suffix: string[] = [];
while (true) {
try {
const canonical = path.normalize(fs.realpathSync.native(current));
return path.normalize(path.join(canonical, ...suffix));
} catch {
const parent = path.dirname(current);
if (parent === current) {
return path.normalize(resolved);
}
suffix.unshift(path.basename(current));
current = parent;
}
}
}
function isPathInsideRoot(candidatePath: string, rootPath: string): boolean {
const relative = path.relative(
canonicalizePathForContainment(rootPath),
canonicalizePathForContainment(candidatePath),
);
return relative === "" || (!relative.startsWith("..") && !path.isAbsolute(relative));
}
function ensureUniqueName(base: string, existing: Set<string>): string {
let name = sanitizeName(base);
if (!existing.has(name)) {
existing.add(name);
return name;
}
let suffix = 2;
while (existing.has(`${name}-${suffix}`)) {
suffix += 1;
}
const unique = `${name}-${suffix}`;
existing.add(unique);
return unique;
}
function resolvePath(raw: string, workspaceDir: string): string {
const trimmed = raw.trim();
if (!trimmed) {
throw new Error("path required");
}
if (trimmed.startsWith("~") || path.isAbsolute(trimmed)) {
return path.normalize(resolveUserPath(trimmed));
}
return path.normalize(path.resolve(workspaceDir, trimmed));
}
function resolveIntervalMs(raw: string | undefined): number {
const value = raw?.trim();
if (!value) {
return parseDurationMs(DEFAULT_QMD_INTERVAL, { defaultUnit: "m" });
}
try {
return parseDurationMs(value, { defaultUnit: "m" });
} catch {
return parseDurationMs(DEFAULT_QMD_INTERVAL, { defaultUnit: "m" });
}
}
function resolveEmbedIntervalMs(raw: string | undefined): number {
const value = raw?.trim();
if (!value) {
return parseDurationMs(DEFAULT_QMD_EMBED_INTERVAL, { defaultUnit: "m" });
}
try {
return parseDurationMs(value, { defaultUnit: "m" });
} catch {
return parseDurationMs(DEFAULT_QMD_EMBED_INTERVAL, { defaultUnit: "m" });
}
}
function resolveDebounceMs(raw: number | undefined): number {
if (typeof raw === "number" && Number.isFinite(raw) && raw >= 0) {
return Math.floor(raw);
}
return DEFAULT_QMD_DEBOUNCE_MS;
}
function resolveTimeoutMs(raw: number | undefined, fallback: number): number {
if (typeof raw === "number" && Number.isFinite(raw) && raw > 0) {
return Math.floor(raw);
}
return fallback;
}
function resolveStartupMode(raw: MemoryQmdConfig["update"]): MemoryQmdStartupMode {
const value = raw?.startup;
if (value === "idle" || value === "immediate" || value === "off") {
return value;
}
return DEFAULT_QMD_STARTUP;
}
function resolveStartupDelayMs(raw: number | undefined): number {
if (typeof raw === "number" && Number.isFinite(raw) && raw >= 0) {
return Math.floor(raw);
}
return DEFAULT_QMD_STARTUP_DELAY_MS;
}
function resolveLimits(raw?: MemoryQmdConfig["limits"]): ResolvedQmdLimitsConfig {
const parsed: ResolvedQmdLimitsConfig = { ...DEFAULT_QMD_LIMITS };
if (raw?.maxResults && raw.maxResults > 0) {
parsed.maxResults = Math.floor(raw.maxResults);
}
if (raw?.maxSnippetChars && raw.maxSnippetChars > 0) {
parsed.maxSnippetChars = Math.floor(raw.maxSnippetChars);
}
if (raw?.maxInjectedChars && raw.maxInjectedChars > 0) {
parsed.maxInjectedChars = Math.floor(raw.maxInjectedChars);
}
if (raw?.timeoutMs && raw.timeoutMs > 0) {
parsed.timeoutMs = Math.floor(raw.timeoutMs);
}
return parsed;
}
function resolveSearchMode(raw?: MemoryQmdConfig["searchMode"]): MemoryQmdSearchMode {
if (raw === "search" || raw === "vsearch" || raw === "query") {
return raw;
}
return DEFAULT_QMD_SEARCH_MODE;
}
function resolveSearchTool(raw?: MemoryQmdConfig["searchTool"]): string | undefined {
const value = raw?.trim();
return value ? value : undefined;
}
function resolveSessionConfig(
cfg: MemoryQmdConfig["sessions"],
workspaceDir: string,
): ResolvedQmdSessionConfig {
const enabled = Boolean(cfg?.enabled);
const exportDirRaw = cfg?.exportDir?.trim();
const exportDir = exportDirRaw ? resolvePath(exportDirRaw, workspaceDir) : undefined;
const retentionDays =
cfg?.retentionDays && cfg.retentionDays > 0 ? Math.floor(cfg.retentionDays) : undefined;
return {
enabled,
exportDir,
retentionDays,
};
}
function resolveCustomPaths(
rawPaths: MemoryQmdIndexPath[] | undefined,
workspaceDir: string,
existing: Set<string>,
agentId: string,
): ResolvedQmdCollection[] {
if (!rawPaths?.length) {
return [];
}
const collections: ResolvedQmdCollection[] = [];
const seenRoots = new Set<string>();
rawPaths.forEach((entry, index) => {
const trimmedPath = normalizeOptionalString(entry?.path);
if (!trimmedPath) {
return;
}
let resolved: string;
try {
resolved = resolvePath(trimmedPath, workspaceDir);
} catch {
return;
}
const pattern = normalizeOptionalString(entry.pattern) || "**/*.md";
const dedupeKey = `${resolved}\u0000${pattern}`;
if (seenRoots.has(dedupeKey)) {
return;
}
seenRoots.add(dedupeKey);
const explicitName = entry.name?.trim();
const baseName =
explicitName && !isPathInsideRoot(resolved, workspaceDir)
? explicitName
: scopeCollectionBase(explicitName || `custom-${index + 1}`, agentId);
const name = ensureUniqueName(baseName, existing);
collections.push({
name,
path: resolved,
pattern,
kind: "custom",
});
});
return collections;
}
function resolveMcporterConfig(raw?: MemoryQmdMcporterConfig): ResolvedQmdMcporterConfig {
const parsed: ResolvedQmdMcporterConfig = { ...DEFAULT_QMD_MCPORTER };
if (!raw) {
return parsed;
}
if (raw.enabled !== undefined) {
parsed.enabled = raw.enabled;
}
if (typeof raw.serverName === "string" && raw.serverName.trim()) {
parsed.serverName = raw.serverName.trim();
}
if (raw.startDaemon !== undefined) {
parsed.startDaemon = raw.startDaemon;
}
// When enabled, default startDaemon to true.
if (parsed.enabled && raw.startDaemon === undefined) {
parsed.startDaemon = true;
}
return parsed;
}
function resolveDefaultCollections(
include: boolean,
workspaceDir: string,
existing: Set<string>,
agentId: string,
): ResolvedQmdCollection[] {
if (!include) {
return [];
}
const entries: Array<{ path: string; pattern: string; base: string }> = [
{ path: workspaceDir, pattern: CANONICAL_ROOT_MEMORY_FILENAME, base: "memory-root" },
{ path: path.join(workspaceDir, "memory"), pattern: "**/*.md", base: "memory-dir" },
];
return entries.map((entry) => ({
name: ensureUniqueName(scopeCollectionBase(entry.base, agentId), existing),
path: entry.path,
pattern: entry.pattern,
kind: "memory",
}));
}
export function resolveMemoryBackendConfig(params: {
cfg: OpenClawConfig;
agentId: string;
}): ResolvedMemoryBackendConfig {
const normalizedAgentId = normalizeAgentId(params.agentId);
const backend = params.cfg.memory?.backend ?? DEFAULT_BACKEND;
const citations = params.cfg.memory?.citations ?? DEFAULT_CITATIONS;
if (backend !== "qmd") {
return { backend: "builtin", citations };
}
const workspaceDir = resolveAgentWorkspaceDir(params.cfg, normalizedAgentId);
const qmdCfg = params.cfg.memory?.qmd;
const includeDefaultMemory = qmdCfg?.includeDefaultMemory !== false;
const nameSet = new Set<string>();
const agentEntry = params.cfg.agents?.list?.find(
(entry) => normalizeAgentId(entry?.id) === normalizedAgentId,
);
const mergedExtraPaths = [
...(params.cfg.agents?.defaults?.memorySearch?.extraPaths ?? []),
...(agentEntry?.memorySearch?.extraPaths ?? []),
]
.filter((value): value is string => typeof value === "string")
.map((value) => value.trim())
.filter(Boolean);
const dedupedExtraPaths = Array.from(new Set(mergedExtraPaths));
const searchExtraPaths = dedupedExtraPaths.map(
(pathValue): { path: string; pattern?: string; name?: string } => ({ path: pathValue }),
);
const mergedExtraCollections = [
...(params.cfg.agents?.defaults?.memorySearch?.qmd?.extraCollections ?? []),
...(agentEntry?.memorySearch?.qmd?.extraCollections ?? []),
].filter(
(value): value is MemoryQmdIndexPath =>
value !== null && typeof value === "object" && typeof value.path === "string",
);
// Combine QMD-specific paths with extraPaths and per-agent cross-agent collections.
const allQmdPaths: MemoryQmdIndexPath[] = [
...(qmdCfg?.paths ?? []),
...searchExtraPaths,
...mergedExtraCollections,
];
const collections = [
...resolveDefaultCollections(includeDefaultMemory, workspaceDir, nameSet, normalizedAgentId),
...resolveCustomPaths(allQmdPaths, workspaceDir, nameSet, normalizedAgentId),
];
const rawCommand = normalizeOptionalString(qmdCfg?.command) || "qmd";
const parsedCommand = splitShellArgs(rawCommand);
const command = parsedCommand?.[0] || rawCommand.split(/\s+/)[0] || "qmd";
const resolved: ResolvedQmdConfig = {
command,
mcporter: resolveMcporterConfig(qmdCfg?.mcporter),
searchMode: resolveSearchMode(qmdCfg?.searchMode),
searchTool: resolveSearchTool(qmdCfg?.searchTool),
collections,
includeDefaultMemory,
sessions: resolveSessionConfig(qmdCfg?.sessions, workspaceDir),
update: {
intervalMs: resolveIntervalMs(qmdCfg?.update?.interval),
debounceMs: resolveDebounceMs(qmdCfg?.update?.debounceMs),
onBoot: qmdCfg?.update?.onBoot !== false,
startup: resolveStartupMode(qmdCfg?.update),
startupDelayMs: resolveStartupDelayMs(qmdCfg?.update?.startupDelayMs),
waitForBootSync: qmdCfg?.update?.waitForBootSync === true,
embedIntervalMs: resolveEmbedIntervalMs(qmdCfg?.update?.embedInterval),
commandTimeoutMs: resolveTimeoutMs(
qmdCfg?.update?.commandTimeoutMs,
DEFAULT_QMD_COMMAND_TIMEOUT_MS,
),
updateTimeoutMs: resolveTimeoutMs(
qmdCfg?.update?.updateTimeoutMs,
DEFAULT_QMD_UPDATE_TIMEOUT_MS,
),
embedTimeoutMs: resolveTimeoutMs(
qmdCfg?.update?.embedTimeoutMs,
DEFAULT_QMD_EMBED_TIMEOUT_MS,
),
},
limits: resolveLimits(qmdCfg?.limits),
scope: qmdCfg?.scope ?? DEFAULT_QMD_SCOPE,
};
return {
backend: "qmd",
citations,
qmd: resolved,
};
}
export * from "../../../packages/memory-host-sdk/src/host/backend-config.js";

View File

@@ -1,55 +1 @@
export type EmbeddingBatchOutputLine = {
custom_id?: string;
error?: { message?: string };
response?: {
status_code?: number;
body?:
| {
data?: Array<{
embedding?: number[];
}>;
error?: { message?: string };
}
| string;
};
};
export function applyEmbeddingBatchOutputLine(params: {
line: EmbeddingBatchOutputLine;
remaining: Set<string>;
errors: string[];
byCustomId: Map<string, number[]>;
}) {
const customId = params.line.custom_id;
if (!customId) {
return;
}
params.remaining.delete(customId);
const errorMessage = params.line.error?.message;
if (errorMessage) {
params.errors.push(`${customId}: ${errorMessage}`);
return;
}
const response = params.line.response;
const statusCode = response?.status_code ?? 0;
if (statusCode >= 400) {
const messageFromObject =
response?.body && typeof response.body === "object"
? (response.body as { error?: { message?: string } }).error?.message
: undefined;
const messageFromString = typeof response?.body === "string" ? response.body : undefined;
params.errors.push(`${customId}: ${messageFromObject ?? messageFromString ?? "unknown error"}`);
return;
}
const data =
response?.body && typeof response.body === "object" ? (response.body.data ?? []) : [];
const embedding = data[0]?.embedding ?? [];
if (embedding.length === 0) {
params.errors.push(`${customId}: empty embedding`);
return;
}
params.byCustomId.set(customId, embedding);
}
export * from "../../../packages/memory-host-sdk/src/host/batch-output.js";

View File

@@ -1,69 +1 @@
const TERMINAL_FAILURE_STATES = new Set(["failed", "expired", "cancelled", "canceled"]);
type BatchStatusLike = {
id?: string;
status?: string;
output_file_id?: string | null;
error_file_id?: string | null;
};
export type BatchCompletionResult = {
outputFileId: string;
errorFileId?: string;
};
export function resolveBatchCompletionFromStatus(params: {
provider: string;
batchId: string;
status: BatchStatusLike;
}): BatchCompletionResult {
if (!params.status.output_file_id) {
throw new Error(`${params.provider} batch ${params.batchId} completed without output file`);
}
return {
outputFileId: params.status.output_file_id,
errorFileId: params.status.error_file_id ?? undefined,
};
}
export async function throwIfBatchTerminalFailure(params: {
provider: string;
status: BatchStatusLike;
readError: (errorFileId: string) => Promise<string | undefined>;
}): Promise<void> {
const state = params.status.status ?? "unknown";
if (!TERMINAL_FAILURE_STATES.has(state)) {
return;
}
const detail = params.status.error_file_id
? await params.readError(params.status.error_file_id)
: undefined;
const suffix = detail ? `: ${detail}` : "";
throw new Error(`${params.provider} batch ${params.status.id ?? "<unknown>"} ${state}${suffix}`);
}
export async function resolveCompletedBatchResult(params: {
provider: string;
status: BatchStatusLike;
wait: boolean;
waitForBatch: () => Promise<BatchCompletionResult>;
}): Promise<BatchCompletionResult> {
const batchId = params.status.id ?? "<unknown>";
if (!params.wait && params.status.status !== "completed") {
throw new Error(
`${params.provider} batch ${batchId} submitted; enable remote.batch.wait to await completion`,
);
}
const completed =
params.status.status === "completed"
? resolveBatchCompletionFromStatus({
provider: params.provider,
batchId,
status: params.status,
})
: await params.waitForBatch();
if (!completed.outputFileId) {
throw new Error(`${params.provider} batch ${batchId} completed without output file`);
}
return completed;
}
export * from "../../../packages/memory-host-sdk/src/host/batch-status.js";

View File

@@ -1,85 +1 @@
import type { EmbeddingInput } from "./embedding-inputs.js";
// Helpers for enforcing embedding model input size limits.
//
// We use UTF-8 byte length as a conservative upper bound for tokenizer output.
// Tokenizers operate over bytes; a token must contain at least one byte, so
// token_count <= utf8_byte_length.
export function estimateUtf8Bytes(text: string): number {
if (!text) {
return 0;
}
return Buffer.byteLength(text, "utf8");
}
export function estimateStructuredEmbeddingInputBytes(input: EmbeddingInput): number {
if (!input.parts?.length) {
return estimateUtf8Bytes(input.text);
}
let total = 0;
for (const part of input.parts) {
if (part.type === "text") {
total += estimateUtf8Bytes(part.text);
continue;
}
total += estimateUtf8Bytes(part.mimeType);
total += estimateUtf8Bytes(part.data);
}
return total;
}
export function splitTextToUtf8ByteLimit(text: string, maxUtf8Bytes: number): string[] {
if (maxUtf8Bytes <= 0) {
return [text];
}
if (estimateUtf8Bytes(text) <= maxUtf8Bytes) {
return [text];
}
const parts: string[] = [];
let cursor = 0;
while (cursor < text.length) {
// The number of UTF-16 code units is always <= the number of UTF-8 bytes.
// This makes `cursor + maxUtf8Bytes` a safe upper bound on the next split point.
let low = cursor + 1;
let high = Math.min(text.length, cursor + maxUtf8Bytes);
let best = cursor;
while (low <= high) {
const mid = Math.floor((low + high) / 2);
const bytes = estimateUtf8Bytes(text.slice(cursor, mid));
if (bytes <= maxUtf8Bytes) {
best = mid;
low = mid + 1;
} else {
high = mid - 1;
}
}
if (best <= cursor) {
best = Math.min(text.length, cursor + 1);
}
// Avoid splitting inside a surrogate pair.
if (
best < text.length &&
best > cursor &&
text.charCodeAt(best - 1) >= 0xd800 &&
text.charCodeAt(best - 1) <= 0xdbff &&
text.charCodeAt(best) >= 0xdc00 &&
text.charCodeAt(best) <= 0xdfff
) {
best -= 1;
}
const part = text.slice(cursor, best);
if (!part) {
break;
}
parts.push(part);
cursor = best;
}
return parts;
}
export * from "../../../packages/memory-host-sdk/src/host/embedding-input-limits.js";

View File

@@ -1,65 +1 @@
import type { SsrFPolicy } from "../../infra/net/ssrf.js";
import {
resolveRemoteEmbeddingBearerClient,
type RemoteEmbeddingProviderId,
} from "./embeddings-remote-client.js";
import { fetchRemoteEmbeddingVectors } from "./embeddings-remote-fetch.js";
import type { EmbeddingProvider, EmbeddingProviderOptions } from "./embeddings.types.js";
export type RemoteEmbeddingClient = {
baseUrl: string;
headers: Record<string, string>;
ssrfPolicy?: SsrFPolicy;
fetchImpl?: typeof fetch;
model: string;
};
export function createRemoteEmbeddingProvider(params: {
id: string;
client: RemoteEmbeddingClient;
errorPrefix: string;
maxInputTokens?: number;
}): EmbeddingProvider {
const { client } = params;
const url = `${client.baseUrl.replace(/\/$/, "")}/embeddings`;
const embed = async (input: string[]): Promise<number[][]> => {
if (input.length === 0) {
return [];
}
return await fetchRemoteEmbeddingVectors({
url,
headers: client.headers,
ssrfPolicy: client.ssrfPolicy,
fetchImpl: client.fetchImpl,
body: { model: client.model, input },
errorPrefix: params.errorPrefix,
});
};
return {
id: params.id,
model: client.model,
...(typeof params.maxInputTokens === "number" ? { maxInputTokens: params.maxInputTokens } : {}),
embedQuery: async (text) => {
const [vec] = await embed([text]);
return vec ?? [];
},
embedBatch: embed,
};
}
export async function resolveRemoteEmbeddingClient(params: {
provider: RemoteEmbeddingProviderId;
options: EmbeddingProviderOptions;
defaultBaseUrl: string;
normalizeModel: (model: string) => string;
}): Promise<RemoteEmbeddingClient> {
const { baseUrl, headers, ssrfPolicy } = await resolveRemoteEmbeddingBearerClient({
provider: params.provider,
options: params.options,
defaultBaseUrl: params.defaultBaseUrl,
});
const model = params.normalizeModel(params.options.model);
return { baseUrl, headers, ssrfPolicy, model };
}
export * from "../../../packages/memory-host-sdk/src/host/embeddings-remote-provider.js";

View File

@@ -1,525 +1 @@
import crypto from "node:crypto";
import fsSync from "node:fs";
import fs from "node:fs/promises";
import path from "node:path";
import { detectMime } from "../../media/mime.js";
import {
CANONICAL_ROOT_MEMORY_FILENAME,
resolveCanonicalRootMemoryFile,
shouldSkipRootMemoryAuxiliaryPath,
} from "../../memory/root-memory-files.js";
import { CHARS_PER_TOKEN_ESTIMATE, estimateStringChars } from "../../utils/cjk-chars.js";
import { runTasksWithConcurrency } from "../../utils/run-with-concurrency.js";
import { estimateStructuredEmbeddingInputBytes } from "./embedding-input-limits.js";
import { buildTextEmbeddingInput, type EmbeddingInput } from "./embedding-inputs.js";
import { isFileMissingError } from "./fs-utils.js";
import {
buildMemoryMultimodalLabel,
classifyMemoryMultimodalPath,
type MemoryMultimodalModality,
type MemoryMultimodalSettings,
} from "./multimodal.js";
export { hashText } from "./hash.js";
import { hashText } from "./hash.js";
export type MemoryFileEntry = {
path: string;
absPath: string;
mtimeMs: number;
size: number;
hash: string;
dataHash?: string;
kind?: "markdown" | "multimodal";
contentText?: string;
modality?: MemoryMultimodalModality;
mimeType?: string;
};
export type MemoryChunk = {
startLine: number;
endLine: number;
text: string;
hash: string;
embeddingInput?: EmbeddingInput;
};
export type MultimodalMemoryChunk = {
chunk: MemoryChunk;
structuredInputBytes: number;
};
const DISABLED_MULTIMODAL_SETTINGS: MemoryMultimodalSettings = {
enabled: false,
modalities: [],
maxFileBytes: 0,
};
export function ensureDir(dir: string): string {
try {
fsSync.mkdirSync(dir, { recursive: true });
} catch {}
return dir;
}
export function normalizeRelPath(value: string): string {
const trimmed = value.trim().replace(/^[./]+/, "");
return trimmed.replace(/\\/g, "/");
}
export function normalizeExtraMemoryPaths(workspaceDir: string, extraPaths?: string[]): string[] {
if (!extraPaths?.length) {
return [];
}
const resolved = extraPaths
.map((value) => value.trim())
.filter(Boolean)
.map((value) =>
path.isAbsolute(value) ? path.resolve(value) : path.resolve(workspaceDir, value),
);
return Array.from(new Set(resolved));
}
export function isMemoryPath(relPath: string): boolean {
const normalized = normalizeRelPath(relPath);
if (!normalized) {
return false;
}
if (normalized === CANONICAL_ROOT_MEMORY_FILENAME || normalized === "DREAMS.md") {
return true;
}
return normalized.startsWith("memory/");
}
function isAllowedMemoryFilePath(filePath: string, multimodal?: MemoryMultimodalSettings): boolean {
if (filePath.endsWith(".md")) {
return true;
}
return (
classifyMemoryMultimodalPath(filePath, multimodal ?? DISABLED_MULTIMODAL_SETTINGS) !== null
);
}
async function walkDir(
dir: string,
files: string[],
multimodal?: MemoryMultimodalSettings,
shouldSkipPath?: (absPath: string) => boolean,
) {
const entries = await fs.readdir(dir, { withFileTypes: true });
for (const entry of entries) {
const full = path.join(dir, entry.name);
if (shouldSkipPath?.(full)) {
continue;
}
if (entry.isSymbolicLink()) {
continue;
}
if (entry.isDirectory()) {
if (entry.name === ".openclaw-repair") {
continue;
}
await walkDir(full, files, multimodal, shouldSkipPath);
continue;
}
if (!entry.isFile()) {
continue;
}
if (!isAllowedMemoryFilePath(full, multimodal)) {
continue;
}
files.push(full);
}
}
export async function listMemoryFiles(
workspaceDir: string,
extraPaths?: string[],
multimodal?: MemoryMultimodalSettings,
): Promise<string[]> {
const result: string[] = [];
const memoryDir = path.join(workspaceDir, "memory");
const shouldSkipWorkspaceMemoryPath = (absPath: string): boolean =>
shouldSkipRootMemoryAuxiliaryPath({ workspaceDir, absPath });
const addMarkdownFile = async (absPath: string) => {
try {
const stat = await fs.lstat(absPath);
if (stat.isSymbolicLink() || !stat.isFile()) {
return;
}
if (!absPath.endsWith(".md")) {
return;
}
result.push(absPath);
} catch {}
};
const memoryFile = await resolveCanonicalRootMemoryFile(workspaceDir);
if (memoryFile) {
await addMarkdownFile(memoryFile);
}
try {
const dirStat = await fs.lstat(memoryDir);
if (!dirStat.isSymbolicLink() && dirStat.isDirectory()) {
await walkDir(memoryDir, result, multimodal, shouldSkipWorkspaceMemoryPath);
}
} catch {}
const normalizedExtraPaths = normalizeExtraMemoryPaths(workspaceDir, extraPaths);
if (normalizedExtraPaths.length > 0) {
for (const inputPath of normalizedExtraPaths) {
if (shouldSkipWorkspaceMemoryPath(inputPath)) {
continue;
}
try {
const stat = await fs.lstat(inputPath);
if (stat.isSymbolicLink()) {
continue;
}
if (stat.isDirectory()) {
await walkDir(inputPath, result, multimodal, shouldSkipWorkspaceMemoryPath);
continue;
}
if (stat.isFile() && isAllowedMemoryFilePath(inputPath, multimodal)) {
result.push(inputPath);
}
} catch {}
}
}
if (result.length <= 1) {
return result;
}
const seen = new Set<string>();
const deduped: string[] = [];
for (const entry of result) {
let key = entry;
try {
key = await fs.realpath(entry);
} catch {}
if (seen.has(key)) {
continue;
}
seen.add(key);
deduped.push(entry);
}
return deduped;
}
export async function buildFileEntry(
absPath: string,
workspaceDir: string,
multimodal?: MemoryMultimodalSettings,
): Promise<MemoryFileEntry | null> {
let stat;
try {
stat = await fs.stat(absPath);
} catch (err) {
if (isFileMissingError(err)) {
return null;
}
throw err;
}
const normalizedPath = path.relative(workspaceDir, absPath).replace(/\\/g, "/");
const multimodalSettings = multimodal ?? DISABLED_MULTIMODAL_SETTINGS;
const modality = classifyMemoryMultimodalPath(absPath, multimodalSettings);
if (modality) {
if (stat.size > multimodalSettings.maxFileBytes) {
return null;
}
let buffer: Buffer;
try {
buffer = await fs.readFile(absPath);
} catch (err) {
if (isFileMissingError(err)) {
return null;
}
throw err;
}
const mimeType = await detectMime({ buffer: buffer.subarray(0, 512), filePath: absPath });
if (!mimeType || !mimeType.startsWith(`${modality}/`)) {
return null;
}
const contentText = buildMemoryMultimodalLabel(modality, normalizedPath);
const dataHash = crypto.createHash("sha256").update(buffer).digest("hex");
const chunkHash = hashText(
JSON.stringify({
path: normalizedPath,
contentText,
mimeType,
dataHash,
}),
);
return {
path: normalizedPath,
absPath,
mtimeMs: stat.mtimeMs,
size: stat.size,
hash: chunkHash,
dataHash,
kind: "multimodal",
contentText,
modality,
mimeType,
};
}
let content: string;
try {
content = await fs.readFile(absPath, "utf-8");
} catch (err) {
if (isFileMissingError(err)) {
return null;
}
throw err;
}
const hash = hashText(content);
return {
path: normalizedPath,
absPath,
mtimeMs: stat.mtimeMs,
size: stat.size,
hash,
kind: "markdown",
};
}
async function loadMultimodalEmbeddingInput(
entry: Pick<
MemoryFileEntry,
"absPath" | "contentText" | "mimeType" | "kind" | "size" | "dataHash"
>,
): Promise<EmbeddingInput | null> {
if (entry.kind !== "multimodal" || !entry.contentText || !entry.mimeType) {
return null;
}
let stat;
try {
stat = await fs.stat(entry.absPath);
} catch (err) {
if (isFileMissingError(err)) {
return null;
}
throw err;
}
if (stat.size !== entry.size) {
return null;
}
let buffer: Buffer;
try {
buffer = await fs.readFile(entry.absPath);
} catch (err) {
if (isFileMissingError(err)) {
return null;
}
throw err;
}
const dataHash = crypto.createHash("sha256").update(buffer).digest("hex");
if (entry.dataHash && entry.dataHash !== dataHash) {
return null;
}
return {
text: entry.contentText,
parts: [
{ type: "text", text: entry.contentText },
{
type: "inline-data",
mimeType: entry.mimeType,
data: buffer.toString("base64"),
},
],
};
}
export async function buildMultimodalChunkForIndexing(
entry: Pick<
MemoryFileEntry,
"absPath" | "contentText" | "mimeType" | "kind" | "hash" | "size" | "dataHash"
>,
): Promise<MultimodalMemoryChunk | null> {
const embeddingInput = await loadMultimodalEmbeddingInput(entry);
if (!embeddingInput) {
return null;
}
return {
chunk: {
startLine: 1,
endLine: 1,
text: entry.contentText ?? embeddingInput.text,
hash: entry.hash,
embeddingInput,
},
structuredInputBytes: estimateStructuredEmbeddingInputBytes(embeddingInput),
};
}
export function chunkMarkdown(
content: string,
chunking: { tokens: number; overlap: number },
): MemoryChunk[] {
const lines = content.split("\n");
if (lines.length === 0) {
return [];
}
const maxChars = Math.max(32, chunking.tokens * CHARS_PER_TOKEN_ESTIMATE);
const overlapChars = Math.max(0, chunking.overlap * CHARS_PER_TOKEN_ESTIMATE);
const chunks: MemoryChunk[] = [];
let current: Array<{ line: string; lineNo: number }> = [];
let currentChars = 0;
const flush = () => {
if (current.length === 0) {
return;
}
const firstEntry = current[0];
const lastEntry = current[current.length - 1];
if (!firstEntry || !lastEntry) {
return;
}
const text = current.map((entry) => entry.line).join("\n");
const startLine = firstEntry.lineNo;
const endLine = lastEntry.lineNo;
chunks.push({
startLine,
endLine,
text,
hash: hashText(text),
embeddingInput: buildTextEmbeddingInput(text),
});
};
const carryOverlap = () => {
if (overlapChars <= 0 || current.length === 0) {
current = [];
currentChars = 0;
return;
}
let acc = 0;
const kept: Array<{ line: string; lineNo: number }> = [];
for (let i = current.length - 1; i >= 0; i -= 1) {
const entry = current[i];
if (!entry) {
continue;
}
acc += estimateStringChars(entry.line) + 1;
kept.unshift(entry);
if (acc >= overlapChars) {
break;
}
}
current = kept;
currentChars = acc;
};
for (let i = 0; i < lines.length; i += 1) {
const line = lines[i] ?? "";
const lineNo = i + 1;
const segments: string[] = [];
if (line.length === 0) {
segments.push("");
} else {
// First pass: slice at maxChars (preserves original behaviour for Latin).
// Second pass: if a segment's *weighted* size still exceeds the budget
// (happens for CJK-heavy text where 1 char ≈ 1 token), re-split it at
// chunking.tokens so the chunk stays within the token budget.
for (let start = 0; start < line.length; start += maxChars) {
const coarse = line.slice(start, start + maxChars);
if (estimateStringChars(coarse) > maxChars) {
const fineStep = Math.max(1, chunking.tokens);
for (let j = 0; j < coarse.length; ) {
let end = Math.min(j + fineStep, coarse.length);
// Avoid splitting inside a UTF-16 surrogate pair (CJK Extension B+).
if (end < coarse.length) {
const code = coarse.charCodeAt(end - 1);
if (code >= 0xd800 && code <= 0xdbff) {
end += 1; // include the low surrogate
}
}
segments.push(coarse.slice(j, end));
j = end; // advance cursor to the adjusted boundary
}
} else {
segments.push(coarse);
}
}
}
for (const segment of segments) {
const lineSize = estimateStringChars(segment) + 1;
if (currentChars + lineSize > maxChars && current.length > 0) {
flush();
carryOverlap();
}
current.push({ line: segment, lineNo });
currentChars += lineSize;
}
}
flush();
return chunks;
}
/**
* Remap chunk startLine/endLine from content-relative positions to original
* source file positions using a lineMap. Each entry in lineMap gives the
* 1-indexed source line for the corresponding 0-indexed content line.
*
* This is used for session JSONL files where buildSessionEntry() flattens
* messages into a plain-text string before chunking. Without remapping the
* stored line numbers would reference positions in the flattened text rather
* than the original JSONL file.
*/
export function remapChunkLines(chunks: MemoryChunk[], lineMap: number[] | undefined): void {
if (!lineMap || lineMap.length === 0) {
return;
}
for (const chunk of chunks) {
// startLine/endLine are 1-indexed; lineMap is 0-indexed by content line
chunk.startLine = lineMap[chunk.startLine - 1] ?? chunk.startLine;
chunk.endLine = lineMap[chunk.endLine - 1] ?? chunk.endLine;
}
}
export function parseEmbedding(raw: string): number[] {
try {
const parsed = JSON.parse(raw) as number[];
return Array.isArray(parsed) ? parsed : [];
} catch {
return [];
}
}
export function cosineSimilarity(a: number[], b: number[]): number {
if (a.length === 0 || b.length === 0) {
return 0;
}
const len = Math.min(a.length, b.length);
let dot = 0;
let normA = 0;
let normB = 0;
for (let i = 0; i < len; i += 1) {
const av = a[i] ?? 0;
const bv = b[i] ?? 0;
dot += av * bv;
normA += av * av;
normB += bv * bv;
}
if (normA === 0 || normB === 0) {
return 0;
}
return dot / (Math.sqrt(normA) * Math.sqrt(normB));
}
export async function runWithConcurrency<T>(
tasks: Array<() => Promise<T>>,
limit: number,
): Promise<T[]> {
const { results, firstError, hasError } = await runTasksWithConcurrency({
tasks,
limit,
errorMode: "stop",
});
if (hasError) {
throw firstError;
}
return results;
}
export * from "../../../packages/memory-host-sdk/src/host/internal.js";

View File

@@ -1,103 +1 @@
import type { DatabaseSync } from "node:sqlite";
import { formatErrorMessage } from "../../infra/errors.js";
export function ensureMemoryIndexSchema(params: {
db: DatabaseSync;
embeddingCacheTable: string;
cacheEnabled: boolean;
ftsTable: string;
ftsEnabled: boolean;
ftsTokenizer?: "unicode61" | "trigram";
}): { ftsAvailable: boolean; ftsError?: string } {
params.db.exec(`
CREATE TABLE IF NOT EXISTS meta (
key TEXT PRIMARY KEY,
value TEXT NOT NULL
);
`);
params.db.exec(`
CREATE TABLE IF NOT EXISTS files (
path TEXT PRIMARY KEY,
source TEXT NOT NULL DEFAULT 'memory',
hash TEXT NOT NULL,
mtime INTEGER NOT NULL,
size INTEGER NOT NULL
);
`);
params.db.exec(`
CREATE TABLE IF NOT EXISTS chunks (
id TEXT PRIMARY KEY,
path TEXT NOT NULL,
source TEXT NOT NULL DEFAULT 'memory',
start_line INTEGER NOT NULL,
end_line INTEGER NOT NULL,
hash TEXT NOT NULL,
model TEXT NOT NULL,
text TEXT NOT NULL,
embedding TEXT NOT NULL,
updated_at INTEGER NOT NULL
);
`);
if (params.cacheEnabled) {
params.db.exec(`
CREATE TABLE IF NOT EXISTS ${params.embeddingCacheTable} (
provider TEXT NOT NULL,
model TEXT NOT NULL,
provider_key TEXT NOT NULL,
hash TEXT NOT NULL,
embedding TEXT NOT NULL,
dims INTEGER,
updated_at INTEGER NOT NULL,
PRIMARY KEY (provider, model, provider_key, hash)
);
`);
params.db.exec(
`CREATE INDEX IF NOT EXISTS idx_embedding_cache_updated_at ON ${params.embeddingCacheTable}(updated_at);`,
);
}
let ftsAvailable = false;
let ftsError: string | undefined;
if (params.ftsEnabled) {
try {
const tokenizer = params.ftsTokenizer ?? "unicode61";
const tokenizeClause = tokenizer === "trigram" ? `, tokenize='trigram case_sensitive 0'` : "";
params.db.exec(
`CREATE VIRTUAL TABLE IF NOT EXISTS ${params.ftsTable} USING fts5(\n` +
` text,\n` +
` id UNINDEXED,\n` +
` path UNINDEXED,\n` +
` source UNINDEXED,\n` +
` model UNINDEXED,\n` +
` start_line UNINDEXED,\n` +
` end_line UNINDEXED\n` +
`${tokenizeClause});`,
);
ftsAvailable = true;
} catch (err) {
const message = formatErrorMessage(err);
ftsAvailable = false;
ftsError = message;
}
}
ensureColumn(params.db, "files", "source", "TEXT NOT NULL DEFAULT 'memory'");
ensureColumn(params.db, "chunks", "source", "TEXT NOT NULL DEFAULT 'memory'");
params.db.exec(`CREATE INDEX IF NOT EXISTS idx_chunks_path ON chunks(path);`);
params.db.exec(`CREATE INDEX IF NOT EXISTS idx_chunks_source ON chunks(source);`);
return { ftsAvailable, ...(ftsError ? { ftsError } : {}) };
}
function ensureColumn(
db: DatabaseSync,
table: "files" | "chunks",
column: string,
definition: string,
): void {
const rows = db.prepare(`PRAGMA table_info(${table})`).all() as Array<{ name: string }>;
if (rows.some((row) => row.name === column)) {
return;
}
db.exec(`ALTER TABLE ${table} ADD COLUMN ${column} ${definition}`);
}
export * from "../../../packages/memory-host-sdk/src/host/memory-schema.js";

View File

@@ -4,22 +4,37 @@ import { fileURLToPath } from "node:url";
import { describe, expect, it } from "vitest";
const HOST_DIR = path.dirname(fileURLToPath(import.meta.url));
const REPO_ROOT = path.resolve(HOST_DIR, "../../..");
const PACKAGE_HOST_DIR = path.join(REPO_ROOT, "packages/memory-host-sdk/src/host");
const PACKAGE_COVERED_MIRRORS = [
const PACKAGE_BRIDGE_FILES = [
"backend-config.ts",
"batch-error-utils.ts",
"batch-output.ts",
"batch-status.ts",
"embedding-chunk-limits.ts",
"embeddings-model-normalize.ts",
"embedding-input-limits.ts",
"embeddings-remote-provider.ts",
"embeddings.ts",
"internal.ts",
"memory-schema.ts",
"multimodal.ts",
"qmd-process.ts",
"qmd-scope.ts",
"query-expansion.ts",
"read-file-shared.ts",
"read-file.ts",
"session-files.ts",
"types.ts",
] as const;
describe("memory-host-sdk mirrored host modules", () => {
it("keeps package-covered source mirrors byte-identical", () => {
for (const fileName of PACKAGE_COVERED_MIRRORS) {
const srcSource = fs.readFileSync(path.join(HOST_DIR, fileName), "utf8");
const packageSource = fs.readFileSync(path.join(PACKAGE_HOST_DIR, fileName), "utf8");
expect(srcSource, fileName).toBe(packageSource);
describe("memory-host-sdk host package bridges", () => {
it("keeps package-owned source bridges thin", () => {
for (const fileName of PACKAGE_BRIDGE_FILES) {
const source = fs.readFileSync(path.join(HOST_DIR, fileName), "utf8");
expect(source, fileName).toBe(
`export * from "../../../packages/memory-host-sdk/src/host/${fileName.replace(
/\.ts$/u,
".js",
)}";\n`,
);
}
});
});

View File

@@ -1,108 +1 @@
import {
lowercasePreservingWhitespace,
normalizeLowercaseStringOrEmpty,
} from "../../shared/string-coerce.js";
const MEMORY_MULTIMODAL_SPECS = {
image: {
labelPrefix: "Image file",
extensions: [".jpg", ".jpeg", ".png", ".webp", ".gif", ".heic", ".heif"],
},
audio: {
labelPrefix: "Audio file",
extensions: [".mp3", ".wav", ".ogg", ".opus", ".m4a", ".aac", ".flac"],
},
} as const;
export type MemoryMultimodalModality = keyof typeof MEMORY_MULTIMODAL_SPECS;
export const MEMORY_MULTIMODAL_MODALITIES = Object.keys(
MEMORY_MULTIMODAL_SPECS,
) as MemoryMultimodalModality[];
export type MemoryMultimodalSelection = MemoryMultimodalModality | "all";
export type MemoryMultimodalSettings = {
enabled: boolean;
modalities: MemoryMultimodalModality[];
maxFileBytes: number;
};
export const DEFAULT_MEMORY_MULTIMODAL_MAX_FILE_BYTES = 10 * 1024 * 1024;
export function normalizeMemoryMultimodalModalities(
raw: MemoryMultimodalSelection[] | undefined,
): MemoryMultimodalModality[] {
if (raw === undefined || raw.includes("all")) {
return [...MEMORY_MULTIMODAL_MODALITIES];
}
const normalized = new Set<MemoryMultimodalModality>();
for (const value of raw) {
if (value === "image" || value === "audio") {
normalized.add(value);
}
}
return Array.from(normalized);
}
export function normalizeMemoryMultimodalSettings(raw: {
enabled?: boolean;
modalities?: MemoryMultimodalSelection[];
maxFileBytes?: number;
}): MemoryMultimodalSettings {
const enabled = raw.enabled === true;
const maxFileBytes =
typeof raw.maxFileBytes === "number" && Number.isFinite(raw.maxFileBytes)
? Math.max(1, Math.floor(raw.maxFileBytes))
: DEFAULT_MEMORY_MULTIMODAL_MAX_FILE_BYTES;
return {
enabled,
modalities: enabled ? normalizeMemoryMultimodalModalities(raw.modalities) : [],
maxFileBytes,
};
}
export function isMemoryMultimodalEnabled(settings: MemoryMultimodalSettings): boolean {
return settings.enabled && settings.modalities.length > 0;
}
export function getMemoryMultimodalExtensions(
modality: MemoryMultimodalModality,
): readonly string[] {
return MEMORY_MULTIMODAL_SPECS[modality].extensions;
}
export function buildMemoryMultimodalLabel(
modality: MemoryMultimodalModality,
normalizedPath: string,
): string {
return `${MEMORY_MULTIMODAL_SPECS[modality].labelPrefix}: ${normalizedPath}`;
}
export function buildCaseInsensitiveExtensionGlob(extension: string): string {
const normalized = normalizeLowercaseStringOrEmpty(extension).replace(/^\./, "");
if (!normalized) {
return "*";
}
const parts = Array.from(normalized, (char) => {
const lower = lowercasePreservingWhitespace(char);
return `[${lower}${char.toUpperCase()}]`;
});
return `*.${parts.join("")}`;
}
export function classifyMemoryMultimodalPath(
filePath: string,
settings: MemoryMultimodalSettings,
): MemoryMultimodalModality | null {
if (!isMemoryMultimodalEnabled(settings)) {
return null;
}
const lower = normalizeLowercaseStringOrEmpty(filePath);
for (const modality of settings.modalities) {
for (const extension of getMemoryMultimodalExtensions(modality)) {
if (lower.endsWith(extension)) {
return modality;
}
}
}
return null;
}
export * from "../../../packages/memory-host-sdk/src/host/multimodal.js";

View File

@@ -1,184 +1 @@
import { spawn } from "node:child_process";
import {
materializeWindowsSpawnProgram,
resolveWindowsSpawnProgram,
} from "../../plugin-sdk/windows-spawn.js";
export type CliSpawnInvocation = {
command: string;
argv: string[];
shell?: boolean;
windowsHide?: boolean;
};
export type QmdBinaryAvailability = {
available: boolean;
error?: string;
};
export function resolveCliSpawnInvocation(params: {
command: string;
args: string[];
env: NodeJS.ProcessEnv;
packageName: string;
}): CliSpawnInvocation {
const program = resolveWindowsSpawnProgram({
command: params.command,
platform: process.platform,
env: params.env,
execPath: process.execPath,
packageName: params.packageName,
allowShellFallback: false,
});
return materializeWindowsSpawnProgram(program, params.args);
}
export async function checkQmdBinaryAvailability(params: {
command: string;
env: NodeJS.ProcessEnv;
cwd?: string;
timeoutMs?: number;
}): Promise<QmdBinaryAvailability> {
let spawnInvocation: CliSpawnInvocation;
try {
spawnInvocation = resolveCliSpawnInvocation({
command: params.command,
args: [],
env: params.env,
packageName: "qmd",
});
} catch (err) {
return { available: false, error: formatQmdAvailabilityError(err) };
}
return await new Promise((resolve) => {
let settled = false;
let didSpawn = false;
const finish = (result: QmdBinaryAvailability) => {
if (settled) {
return;
}
settled = true;
if (timer) {
clearTimeout(timer);
}
resolve(result);
};
const child = spawn(spawnInvocation.command, spawnInvocation.argv, {
env: params.env,
cwd: params.cwd ?? process.cwd(),
shell: spawnInvocation.shell,
windowsHide: spawnInvocation.windowsHide,
stdio: "ignore",
});
const timer = setTimeout(() => {
child.kill("SIGKILL");
finish({
available: false,
error: `spawn ${params.command} timed out after ${params.timeoutMs ?? 2_000}ms`,
});
}, params.timeoutMs ?? 2_000);
child.once("error", (err) => {
finish({ available: false, error: formatQmdAvailabilityError(err) });
});
child.once("spawn", () => {
didSpawn = true;
child.kill();
finish({ available: true });
});
child.once("close", () => {
if (!didSpawn) {
return;
}
finish({ available: true });
});
});
}
export async function runCliCommand(params: {
commandSummary: string;
spawnInvocation: CliSpawnInvocation;
env: NodeJS.ProcessEnv;
cwd: string;
timeoutMs?: number;
maxOutputChars: number;
discardStdout?: boolean;
}): Promise<{ stdout: string; stderr: string }> {
return await new Promise((resolve, reject) => {
const child = spawn(params.spawnInvocation.command, params.spawnInvocation.argv, {
env: params.env,
cwd: params.cwd,
shell: params.spawnInvocation.shell,
windowsHide: params.spawnInvocation.windowsHide,
});
let stdout = "";
let stderr = "";
let stdoutTruncated = false;
let stderrTruncated = false;
const discardStdout = params.discardStdout === true;
const timer = params.timeoutMs
? setTimeout(() => {
child.kill("SIGKILL");
reject(new Error(`${params.commandSummary} timed out after ${params.timeoutMs}ms`));
}, params.timeoutMs)
: null;
child.stdout.on("data", (data) => {
if (discardStdout) {
return;
}
const next = appendOutputWithCap(stdout, data.toString("utf8"), params.maxOutputChars);
stdout = next.text;
stdoutTruncated = stdoutTruncated || next.truncated;
});
child.stderr.on("data", (data) => {
const next = appendOutputWithCap(stderr, data.toString("utf8"), params.maxOutputChars);
stderr = next.text;
stderrTruncated = stderrTruncated || next.truncated;
});
child.on("error", (err) => {
if (timer) {
clearTimeout(timer);
}
reject(err);
});
child.on("close", (code) => {
if (timer) {
clearTimeout(timer);
}
if (!discardStdout && (stdoutTruncated || stderrTruncated)) {
reject(
new Error(
`${params.commandSummary} produced too much output (limit ${params.maxOutputChars} chars)`,
),
);
return;
}
if (code === 0) {
resolve({ stdout, stderr });
} else {
reject(new Error(`${params.commandSummary} failed (code ${code}): ${stderr || stdout}`));
}
});
});
}
function appendOutputWithCap(
current: string,
chunk: string,
maxChars: number,
): { text: string; truncated: boolean } {
const appended = current + chunk;
if (appended.length <= maxChars) {
return { text: appended, truncated: false };
}
return { text: appended.slice(-maxChars), truncated: true };
}
function formatQmdAvailabilityError(err: unknown): string {
if (err instanceof Error && err.message) {
return err.message;
}
return String(err);
}
export * from "../../../packages/memory-host-sdk/src/host/qmd-process.js";

View File

@@ -1,110 +1 @@
import { parseAgentSessionKey } from "../../sessions/session-key-utils.js";
import {
normalizeLowercaseStringOrEmpty,
normalizeOptionalLowercaseString,
} from "../../shared/string-coerce.js";
import type { ResolvedQmdConfig } from "./backend-config.js";
type ParsedQmdSessionScope = {
channel?: string;
chatType?: "channel" | "group" | "direct";
normalizedKey?: string;
};
export function isQmdScopeAllowed(scope: ResolvedQmdConfig["scope"], sessionKey?: string): boolean {
if (!scope) {
return true;
}
const parsed = parseQmdSessionScope(sessionKey);
const channel = parsed.channel;
const chatType = parsed.chatType;
const normalizedKey = parsed.normalizedKey ?? "";
const rawKey = normalizeLowercaseStringOrEmpty(sessionKey);
for (const rule of scope.rules ?? []) {
if (!rule) {
continue;
}
const match = rule.match ?? {};
if (match.channel && match.channel !== channel) {
continue;
}
if (match.chatType && match.chatType !== chatType) {
continue;
}
const normalizedPrefix = normalizeOptionalLowercaseString(match.keyPrefix);
const rawPrefix = normalizeOptionalLowercaseString(match.rawKeyPrefix);
if (rawPrefix && !rawKey.startsWith(rawPrefix)) {
continue;
}
if (normalizedPrefix) {
// Backward compat: older configs used `keyPrefix: "agent:<id>:..."` to match raw keys.
const isLegacyRaw = normalizedPrefix.startsWith("agent:");
if (isLegacyRaw) {
if (!rawKey.startsWith(normalizedPrefix)) {
continue;
}
} else if (!normalizedKey.startsWith(normalizedPrefix)) {
continue;
}
}
return rule.action === "allow";
}
const fallback = scope.default ?? "allow";
return fallback === "allow";
}
export function deriveQmdScopeChannel(key?: string): string | undefined {
return parseQmdSessionScope(key).channel;
}
export function deriveQmdScopeChatType(key?: string): "channel" | "group" | "direct" | undefined {
return parseQmdSessionScope(key).chatType;
}
function parseQmdSessionScope(key?: string): ParsedQmdSessionScope {
const normalized = normalizeQmdSessionKey(key);
if (!normalized) {
return {};
}
const parts = normalized.split(":").filter(Boolean);
let chatType: ParsedQmdSessionScope["chatType"];
if (
parts.length >= 2 &&
(parts[1] === "group" || parts[1] === "channel" || parts[1] === "direct" || parts[1] === "dm")
) {
if (parts.includes("group")) {
chatType = "group";
} else if (parts.includes("channel")) {
chatType = "channel";
}
return {
normalizedKey: normalized,
channel: normalizeOptionalLowercaseString(parts[0]),
chatType: chatType ?? "direct",
};
}
if (normalized.includes(":group:")) {
return { normalizedKey: normalized, chatType: "group" };
}
if (normalized.includes(":channel:")) {
return { normalizedKey: normalized, chatType: "channel" };
}
return { normalizedKey: normalized, chatType: "direct" };
}
function normalizeQmdSessionKey(key?: string): string | undefined {
if (!key) {
return undefined;
}
const trimmed = key.trim();
if (!trimmed) {
return undefined;
}
const parsed = parseAgentSessionKey(trimmed);
const normalized = normalizeLowercaseStringOrEmpty(parsed?.rest ?? trimmed);
if (normalized.startsWith("subagent:")) {
return undefined;
}
return normalized;
}
export * from "../../../packages/memory-host-sdk/src/host/qmd-scope.js";

View File

@@ -1,830 +1 @@
/**
* Query expansion for FTS-only search mode.
*
* When no embedding provider is available, we fall back to FTS (full-text search).
* FTS works best with specific keywords, but users often ask conversational queries
* like "that thing we discussed yesterday" or "之前讨论的那个方案".
*
* This module extracts meaningful keywords from such queries to improve FTS results.
*/
import { normalizeLowercaseStringOrEmpty } from "../../shared/string-coerce.js";
// Common stop words that don't add search value
const STOP_WORDS_EN = new Set([
// Articles and determiners
"a",
"an",
"the",
"this",
"that",
"these",
"those",
// Pronouns
"i",
"me",
"my",
"we",
"our",
"you",
"your",
"he",
"she",
"it",
"they",
"them",
// Common verbs
"is",
"are",
"was",
"were",
"be",
"been",
"being",
"have",
"has",
"had",
"do",
"does",
"did",
"will",
"would",
"could",
"should",
"can",
"may",
"might",
// Prepositions
"in",
"on",
"at",
"to",
"for",
"of",
"with",
"by",
"from",
"about",
"into",
"through",
"during",
"before",
"after",
"above",
"below",
"between",
"under",
"over",
// Conjunctions
"and",
"or",
"but",
"if",
"then",
"because",
"as",
"while",
"when",
"where",
"what",
"which",
"who",
"how",
"why",
// Time references (vague, not useful for FTS)
"yesterday",
"today",
"tomorrow",
"earlier",
"later",
"recently",
"before",
"ago",
"just",
"now",
// Vague references
"thing",
"things",
"stuff",
"something",
"anything",
"everything",
"nothing",
// Question words
"please",
"help",
"find",
"show",
"get",
"tell",
"give",
]);
const STOP_WORDS_ES = new Set([
// Articles and determiners
"el",
"la",
"los",
"las",
"un",
"una",
"unos",
"unas",
"este",
"esta",
"ese",
"esa",
// Pronouns
"yo",
"me",
"mi",
"nosotros",
"nosotras",
"tu",
"tus",
"usted",
"ustedes",
"ellos",
"ellas",
// Prepositions and conjunctions
"de",
"del",
"a",
"en",
"con",
"por",
"para",
"sobre",
"entre",
"y",
"o",
"pero",
"si",
"porque",
"como",
// Common verbs / auxiliaries
"es",
"son",
"fue",
"fueron",
"ser",
"estar",
"haber",
"tener",
"hacer",
// Time references (vague)
"ayer",
"hoy",
"mañana",
"antes",
"despues",
"después",
"ahora",
"recientemente",
// Question/request words
"que",
"qué",
"cómo",
"cuando",
"cuándo",
"donde",
"dónde",
"porqué",
"favor",
"ayuda",
]);
const STOP_WORDS_PT = new Set([
// Articles and determiners
"o",
"a",
"os",
"as",
"um",
"uma",
"uns",
"umas",
"este",
"esta",
"esse",
"essa",
// Pronouns
"eu",
"me",
"meu",
"minha",
"nos",
"nós",
"você",
"vocês",
"ele",
"ela",
"eles",
"elas",
// Prepositions and conjunctions
"de",
"do",
"da",
"em",
"com",
"por",
"para",
"sobre",
"entre",
"e",
"ou",
"mas",
"se",
"porque",
"como",
// Common verbs / auxiliaries
"é",
"são",
"foi",
"foram",
"ser",
"estar",
"ter",
"fazer",
// Time references (vague)
"ontem",
"hoje",
"amanhã",
"antes",
"depois",
"agora",
"recentemente",
// Question/request words
"que",
"quê",
"quando",
"onde",
"porquê",
"favor",
"ajuda",
]);
const STOP_WORDS_AR = new Set([
// Articles and connectors
"ال",
"و",
"أو",
"لكن",
"ثم",
"بل",
// Pronouns / references
"أنا",
"نحن",
"هو",
"هي",
"هم",
"هذا",
"هذه",
"ذلك",
"تلك",
"هنا",
"هناك",
// Common prepositions
"من",
"إلى",
"الى",
"في",
"على",
"عن",
"مع",
"بين",
"ل",
"ب",
"ك",
// Common auxiliaries / vague verbs
"كان",
"كانت",
"يكون",
"تكون",
"صار",
"أصبح",
"يمكن",
"ممكن",
// Time references (vague)
"بالأمس",
"امس",
"اليوم",
"غدا",
"الآن",
"قبل",
"بعد",
"مؤخرا",
// Question/request words
"لماذا",
"كيف",
"ماذا",
"متى",
"أين",
"هل",
"من فضلك",
"فضلا",
"ساعد",
]);
const STOP_WORDS_KO = new Set([
// Particles (조사)
"은",
"는",
"이",
"가",
"을",
"를",
"의",
"에",
"에서",
"로",
"으로",
"와",
"과",
"도",
"만",
"까지",
"부터",
"한테",
"에게",
"께",
"처럼",
"같이",
"보다",
"마다",
"밖에",
"대로",
// Pronouns (대명사)
"나",
"나는",
"내가",
"나를",
"너",
"우리",
"저",
"저희",
"그",
"그녀",
"그들",
"이것",
"저것",
"그것",
"여기",
"저기",
"거기",
// Common verbs / auxiliaries (일반 동사/보조 동사)
"있다",
"없다",
"하다",
"되다",
"이다",
"아니다",
"보다",
"주다",
"오다",
"가다",
// Nouns (의존 명사 / vague)
"것",
"거",
"등",
"수",
"때",
"곳",
"중",
"분",
// Adverbs
"잘",
"더",
"또",
"매우",
"정말",
"아주",
"많이",
"너무",
"좀",
// Conjunctions
"그리고",
"하지만",
"그래서",
"그런데",
"그러나",
"또는",
"그러면",
// Question words
"왜",
"어떻게",
"뭐",
"언제",
"어디",
"누구",
"무엇",
"어떤",
// Time (vague)
"어제",
"오늘",
"내일",
"최근",
"지금",
"아까",
"나중",
"전에",
// Request words
"제발",
"부탁",
]);
// Common Korean trailing particles to strip from words for tokenization
// Sorted by descending length so longest-match-first is guaranteed.
const KO_TRAILING_PARTICLES = [
"에서",
"으로",
"에게",
"한테",
"처럼",
"같이",
"보다",
"까지",
"부터",
"마다",
"밖에",
"대로",
"은",
"는",
"이",
"가",
"을",
"를",
"의",
"에",
"로",
"와",
"과",
"도",
"만",
].toSorted((a, b) => b.length - a.length);
function stripKoreanTrailingParticle(token: string): string | null {
for (const particle of KO_TRAILING_PARTICLES) {
if (token.length > particle.length && token.endsWith(particle)) {
return token.slice(0, -particle.length);
}
}
return null;
}
function isUsefulKoreanStem(stem: string): boolean {
// Prevent bogus one-syllable stems from words like "논의" -> "논".
if (/[\uac00-\ud7af]/.test(stem)) {
return stem.length >= 2;
}
// Keep stripped ASCII stems for mixed tokens like "API를" -> "api".
return /^[a-z0-9_]+$/i.test(stem);
}
const STOP_WORDS_JA = new Set([
// Pronouns and references
"これ",
"それ",
"あれ",
"この",
"その",
"あの",
"ここ",
"そこ",
"あそこ",
// Common auxiliaries / vague verbs
"する",
"した",
"して",
"です",
"ます",
"いる",
"ある",
"なる",
"できる",
// Particles / connectors
"の",
"こと",
"もの",
"ため",
"そして",
"しかし",
"また",
"でも",
"から",
"まで",
"より",
"だけ",
// Question words
"なぜ",
"どう",
"何",
"いつ",
"どこ",
"誰",
"どれ",
// Time (vague)
"昨日",
"今日",
"明日",
"最近",
"今",
"さっき",
"前",
"後",
]);
const STOP_WORDS_ZH = new Set([
// Pronouns
"我",
"我们",
"你",
"你们",
"他",
"她",
"它",
"他们",
"这",
"那",
"这个",
"那个",
"这些",
"那些",
// Auxiliary words
"的",
"了",
"着",
"过",
"得",
"地",
"吗",
"呢",
"吧",
"啊",
"呀",
"嘛",
"啦",
// Verbs (common, vague)
"是",
"有",
"在",
"被",
"把",
"给",
"让",
"用",
"到",
"去",
"来",
"做",
"说",
"看",
"找",
"想",
"要",
"能",
"会",
"可以",
// Prepositions and conjunctions
"和",
"与",
"或",
"但",
"但是",
"因为",
"所以",
"如果",
"虽然",
"而",
"也",
"都",
"就",
"还",
"又",
"再",
"才",
"只",
// Time (vague)
"之前",
"以前",
"之后",
"以后",
"刚才",
"现在",
"昨天",
"今天",
"明天",
"最近",
// Vague references
"东西",
"事情",
"事",
"什么",
"哪个",
"哪些",
"怎么",
"为什么",
"多少",
// Question/request words
"请",
"帮",
"帮忙",
"告诉",
]);
export function isQueryStopWordToken(token: string): boolean {
return (
STOP_WORDS_EN.has(token) ||
STOP_WORDS_ES.has(token) ||
STOP_WORDS_PT.has(token) ||
STOP_WORDS_AR.has(token) ||
STOP_WORDS_ZH.has(token) ||
STOP_WORDS_KO.has(token) ||
STOP_WORDS_JA.has(token)
);
}
/**
* Check if a token looks like a meaningful keyword.
* Returns false for short tokens, numbers-only, etc.
*/
function isValidKeyword(token: string): boolean {
if (!token || token.length === 0) {
return false;
}
// Skip very short English words (likely stop words or fragments)
if (/^[a-zA-Z]+$/.test(token) && token.length < 3) {
return false;
}
// Skip pure numbers (not useful for semantic search)
if (/^\d+$/.test(token)) {
return false;
}
// Skip tokens that are all punctuation
if (/^[\p{P}\p{S}]+$/u.test(token)) {
return false;
}
return true;
}
/**
* Simple tokenizer that handles English, Chinese, Korean, and Japanese text.
* For Chinese, we do character-based splitting since we don't have a proper segmenter.
* For English, we split on whitespace and punctuation.
*/
function tokenize(text: string, opts?: { ftsTokenizer?: "unicode61" | "trigram" }): string[] {
const useTrigram = opts?.ftsTokenizer === "trigram";
const tokens: string[] = [];
const normalized = normalizeLowercaseStringOrEmpty(text);
// Split into segments (English words, Chinese character sequences, etc.)
const segments = normalized.split(/[\s\p{P}]+/u).filter(Boolean);
for (const segment of segments) {
// Japanese text often mixes scripts (kanji/kana/ASCII) without spaces.
// Extract script-specific chunks so technical terms like "API" / "バグ" are retained.
if (/[\u3040-\u30ff]/.test(segment)) {
const jpParts =
segment.match(/[a-z0-9_]+|[\u30a0-\u30ffー]+|[\u4e00-\u9fff]+|[\u3040-\u309f]{2,}/g) ?? [];
for (const part of jpParts) {
if (/^[\u4e00-\u9fff]+$/.test(part)) {
tokens.push(part);
if (!useTrigram) {
for (let i = 0; i < part.length - 1; i++) {
tokens.push(part[i] + part[i + 1]);
}
}
} else {
tokens.push(part);
}
}
} else if (/[\u4e00-\u9fff]/.test(segment)) {
// Check if segment contains CJK characters (Chinese)
const chars = Array.from(segment).filter((c) => /[\u4e00-\u9fff]/.test(c));
if (useTrigram) {
// In trigram mode, push the whole contiguous CJK block (mirroring the
// Japanese kanji path). SQLite's trigram FTS requires at least 3 characters
// per query term — individual characters silently return no results.
const block = chars.join("");
if (block.length > 0) {
tokens.push(block);
}
} else {
// Default mode: unigrams + bigrams for phrase matching
tokens.push(...chars);
for (let i = 0; i < chars.length - 1; i++) {
tokens.push(chars[i] + chars[i + 1]);
}
}
} else if (/[\uac00-\ud7af\u3131-\u3163]/.test(segment)) {
// For Korean (Hangul syllables and jamo), keep the word as-is unless it is
// effectively a stop word once trailing particles are removed.
const stem = stripKoreanTrailingParticle(segment);
const stemIsStopWord = stem !== null && STOP_WORDS_KO.has(stem);
if (!STOP_WORDS_KO.has(segment) && !stemIsStopWord) {
tokens.push(segment);
}
// Also emit particle-stripped stems when they are useful keywords.
if (stem && !STOP_WORDS_KO.has(stem) && isUsefulKoreanStem(stem)) {
tokens.push(stem);
}
} else {
// For non-CJK, keep as single token
tokens.push(segment);
}
}
return tokens;
}
/**
* Extract keywords from a conversational query for FTS search.
*
* Examples:
* - "that thing we discussed about the API" → ["discussed", "API"]
* - "之前讨论的那个方案" → ["讨论", "方案"]
* - "what was the solution for the bug" → ["solution", "bug"]
*/
export function extractKeywords(
query: string,
opts?: { ftsTokenizer?: "unicode61" | "trigram" },
): string[] {
const tokens = tokenize(query, opts);
const keywords: string[] = [];
const seen = new Set<string>();
for (const token of tokens) {
// Skip stop words
if (isQueryStopWordToken(token)) {
continue;
}
// Skip invalid keywords
if (!isValidKeyword(token)) {
continue;
}
// Skip duplicates
if (seen.has(token)) {
continue;
}
seen.add(token);
keywords.push(token);
}
return keywords;
}
/**
* Expand a query for FTS search.
* Returns both the original query and extracted keywords for OR-matching.
*
* @param query - User's original query
* @returns Object with original query and extracted keywords
*/
export function expandQueryForFts(
query: string,
opts?: { ftsTokenizer?: "unicode61" | "trigram" },
): {
original: string;
keywords: string[];
expanded: string;
} {
const original = query.trim();
const keywords = extractKeywords(original, opts);
// Build expanded query: original terms OR extracted keywords
// This ensures both exact matches and keyword matches are found
const expanded = keywords.length > 0 ? `${original} OR ${keywords.join(" OR ")}` : original;
return { original, keywords, expanded };
}
/**
* Type for an optional LLM-based query expander.
* Can be provided to enhance keyword extraction with semantic understanding.
*/
export type LlmQueryExpander = (query: string) => Promise<string[]>;
/**
* Expand query with optional LLM assistance.
* Falls back to local extraction if LLM is unavailable or fails.
*/
export async function expandQueryWithLlm(
query: string,
llmExpander?: LlmQueryExpander,
opts?: { ftsTokenizer?: "unicode61" | "trigram" },
): Promise<string[]> {
// If LLM expander is provided, try it first
if (llmExpander) {
try {
const llmKeywords = await llmExpander(query);
if (llmKeywords.length > 0) {
return llmKeywords;
}
} catch {
// LLM failed, fall back to local extraction
}
}
// Fall back to local keyword extraction
return extractKeywords(query, opts);
}
export * from "../../../packages/memory-host-sdk/src/host/query-expansion.js";

View File

@@ -1,107 +1 @@
import fs from "node:fs/promises";
import path from "node:path";
import { resolveAgentContextLimits, resolveAgentWorkspaceDir } from "../../agents/agent-scope.js";
import { resolveMemorySearchConfig } from "../../agents/memory-search.js";
import type { OpenClawConfig } from "../../config/types.openclaw.js";
import { isFileMissingError, statRegularFile } from "./fs-utils.js";
import { isMemoryPath, normalizeExtraMemoryPaths } from "./internal.js";
import {
buildMemoryReadResult,
DEFAULT_MEMORY_READ_LINES,
type MemoryReadResult,
} from "./read-file-shared.js";
export async function readMemoryFile(params: {
workspaceDir: string;
extraPaths?: string[];
relPath: string;
from?: number;
lines?: number;
defaultLines?: number;
maxChars?: number;
}): Promise<MemoryReadResult> {
const rawPath = params.relPath.trim();
if (!rawPath) {
throw new Error("path required");
}
const absPath = path.isAbsolute(rawPath)
? path.resolve(rawPath)
: path.resolve(params.workspaceDir, rawPath);
const relPath = path.relative(params.workspaceDir, absPath).replace(/\\/g, "/");
const inWorkspace = relPath.length > 0 && !relPath.startsWith("..") && !path.isAbsolute(relPath);
const allowedWorkspace = inWorkspace && isMemoryPath(relPath);
let allowedAdditional = false;
if (!allowedWorkspace && (params.extraPaths?.length ?? 0) > 0) {
const additionalPaths = normalizeExtraMemoryPaths(params.workspaceDir, params.extraPaths);
for (const additionalPath of additionalPaths) {
try {
const stat = await fs.lstat(additionalPath);
if (stat.isSymbolicLink()) {
continue;
}
if (stat.isDirectory()) {
if (absPath === additionalPath || absPath.startsWith(`${additionalPath}${path.sep}`)) {
allowedAdditional = true;
break;
}
continue;
}
if (stat.isFile() && absPath === additionalPath && absPath.endsWith(".md")) {
allowedAdditional = true;
break;
}
} catch {}
}
}
if (!allowedWorkspace && !allowedAdditional) {
throw new Error("path required");
}
if (!absPath.endsWith(".md")) {
throw new Error("path required");
}
const statResult = await statRegularFile(absPath);
if (statResult.missing) {
return { text: "", path: relPath };
}
let content: string;
try {
content = await fs.readFile(absPath, "utf-8");
} catch (err) {
if (isFileMissingError(err)) {
return { text: "", path: relPath };
}
throw err;
}
return buildMemoryReadResult({
content,
relPath,
from: params.from,
lines: params.lines,
defaultLines: params.defaultLines ?? DEFAULT_MEMORY_READ_LINES,
maxChars: params.maxChars,
suggestReadFallback: allowedWorkspace,
});
}
export async function readAgentMemoryFile(params: {
cfg: OpenClawConfig;
agentId: string;
relPath: string;
from?: number;
lines?: number;
}): Promise<MemoryReadResult> {
const settings = resolveMemorySearchConfig(params.cfg, params.agentId);
if (!settings) {
throw new Error("memory search disabled");
}
const contextLimits = resolveAgentContextLimits(params.cfg, params.agentId);
return await readMemoryFile({
workspaceDir: resolveAgentWorkspaceDir(params.cfg, params.agentId),
extraPaths: settings.extraPaths,
relPath: params.relPath,
from: params.from,
lines: params.lines,
defaultLines: contextLimits?.memoryGetDefaultLines,
maxChars: contextLimits?.memoryGetMaxChars,
});
}
export * from "../../../packages/memory-host-sdk/src/host/read-file.js";

View File

@@ -1,565 +1 @@
import fsSync from "node:fs";
import fs from "node:fs/promises";
import path from "node:path";
import { stripInternalRuntimeContext } from "../../agents/internal-runtime-context.js";
import { isHeartbeatUserMessage } from "../../auto-reply/heartbeat-filter.js";
import { HEARTBEAT_PROMPT } from "../../auto-reply/heartbeat.js";
import { stripInboundMetadata } from "../../auto-reply/reply/strip-inbound-meta.js";
import { HEARTBEAT_TOKEN, isSilentReplyPayloadText } from "../../auto-reply/tokens.js";
import {
isCompactionCheckpointTranscriptFileName,
isSessionArchiveArtifactName,
isUsageCountedSessionTranscriptFileName,
} from "../../config/sessions/artifacts.js";
import { resolveSessionTranscriptsDirForAgent } from "../../config/sessions/paths.js";
import { isExecCompletionEvent } from "../../infra/heartbeat-events-filter.js";
import { redactSensitiveText } from "../../logging/redact.js";
import { hasInterSessionUserProvenance } from "../../sessions/input-provenance.js";
import { isCronRunSessionKey } from "../../sessions/session-key-utils.js";
import { hashText } from "./hash.js";
const DREAMING_NARRATIVE_RUN_PREFIX = "dreaming-narrative-";
// Keep the historical one-line-per-message export shape for normal turns, but
// wrap pathological long messages so downstream indexers never ingest a single
// toxic line. Wrapped continuation lines still map back to the same JSONL line.
// This limit applies to content only; the role label adds up to 11 chars.
const SESSION_EXPORT_CONTENT_WRAP_CHARS = 800;
const DIRECT_CRON_PROMPT_RE = /^\[cron:[^\]]+\]\s*/;
export type SessionFileEntry = {
path: string;
absPath: string;
mtimeMs: number;
size: number;
hash: string;
content: string;
/** Maps each content line (0-indexed) to its 1-indexed JSONL source line. */
lineMap: number[];
/** Maps each content line (0-indexed) to epoch ms; 0 means unknown timestamp. */
messageTimestampsMs: number[];
/** True when this transcript belongs to an internal dreaming narrative run. */
generatedByDreamingNarrative?: boolean;
/** True when this transcript belongs to an isolated cron run session. */
generatedByCronRun?: boolean;
};
export type BuildSessionEntryOptions = {
/** Optional preclassification from a caller-managed dreaming transcript lookup. */
generatedByDreamingNarrative?: boolean;
/** Optional preclassification from a caller-managed cron transcript lookup. */
generatedByCronRun?: boolean;
};
export type SessionTranscriptClassification = {
dreamingNarrativeTranscriptPaths: ReadonlySet<string>;
cronRunTranscriptPaths: ReadonlySet<string>;
};
type SessionTranscriptStoreEntry = {
sessionFile?: unknown;
sessionId?: unknown;
};
function shouldSkipTranscriptFileForDreaming(absPath: string): boolean {
const fileName = path.basename(absPath);
return (
isSessionArchiveArtifactName(fileName) || isCompactionCheckpointTranscriptFileName(fileName)
);
}
function isDreamingNarrativeBootstrapRecord(record: unknown): boolean {
if (!record || typeof record !== "object" || Array.isArray(record)) {
return false;
}
const candidate = record as {
type?: unknown;
customType?: unknown;
data?: unknown;
};
if (
candidate.type !== "custom" ||
candidate.customType !== "openclaw:bootstrap-context:full" ||
!candidate.data ||
typeof candidate.data !== "object" ||
Array.isArray(candidate.data)
) {
return false;
}
const runId = (candidate.data as { runId?: unknown }).runId;
return typeof runId === "string" && runId.startsWith(DREAMING_NARRATIVE_RUN_PREFIX);
}
function hasDreamingNarrativeRunId(value: unknown): boolean {
return typeof value === "string" && value.startsWith(DREAMING_NARRATIVE_RUN_PREFIX);
}
function isDreamingNarrativeGeneratedRecord(record: unknown): boolean {
if (isDreamingNarrativeBootstrapRecord(record)) {
return true;
}
if (!record || typeof record !== "object" || Array.isArray(record)) {
return false;
}
const candidate = record as {
runId?: unknown;
sessionKey?: unknown;
data?: unknown;
};
if (
hasDreamingNarrativeRunId(candidate.runId) ||
hasDreamingNarrativeRunId(candidate.sessionKey)
) {
return true;
}
if (!candidate.data || typeof candidate.data !== "object" || Array.isArray(candidate.data)) {
return false;
}
const nested = candidate.data as {
runId?: unknown;
sessionKey?: unknown;
};
return hasDreamingNarrativeRunId(nested.runId) || hasDreamingNarrativeRunId(nested.sessionKey);
}
function isDreamingNarrativeSessionStoreKey(sessionKey: string): boolean {
const trimmed = sessionKey.trim();
if (!trimmed) {
return false;
}
const firstSeparator = trimmed.indexOf(":");
if (firstSeparator < 0) {
return trimmed.startsWith(DREAMING_NARRATIVE_RUN_PREFIX);
}
const secondSeparator = trimmed.indexOf(":", firstSeparator + 1);
const sessionSegment = secondSeparator < 0 ? trimmed : trimmed.slice(secondSeparator + 1);
return sessionSegment.startsWith(DREAMING_NARRATIVE_RUN_PREFIX);
}
function normalizeComparablePath(pathname: string): string {
const resolved = path.resolve(pathname);
return process.platform === "win32" ? resolved.toLowerCase() : resolved;
}
export function normalizeSessionTranscriptPathForComparison(pathname: string): string {
return normalizeComparablePath(pathname);
}
function resolveSessionStoreTranscriptPath(
sessionsDir: string,
entry: { sessionFile?: unknown; sessionId?: unknown } | undefined,
): string | null {
if (typeof entry?.sessionFile === "string" && entry.sessionFile.trim().length > 0) {
const sessionFile = entry.sessionFile.trim();
const resolved = path.isAbsolute(sessionFile)
? sessionFile
: path.resolve(sessionsDir, sessionFile);
return normalizeComparablePath(resolved);
}
if (typeof entry?.sessionId === "string" && entry.sessionId.trim().length > 0) {
return normalizeComparablePath(path.join(sessionsDir, `${entry.sessionId.trim()}.jsonl`));
}
return null;
}
export function loadDreamingNarrativeTranscriptPathSetForSessionsDir(
sessionsDir: string,
): ReadonlySet<string> {
return loadSessionTranscriptClassificationForSessionsDir(sessionsDir)
.dreamingNarrativeTranscriptPaths;
}
export function loadSessionTranscriptClassificationForSessionsDir(
sessionsDir: string,
): SessionTranscriptClassification {
const storePath = path.join(sessionsDir, "sessions.json");
const store = readSessionTranscriptClassificationStore(storePath);
const dreamingTranscriptPaths = new Set<string>();
const cronRunTranscriptPaths = new Set<string>();
for (const [sessionKey, entry] of Object.entries(store)) {
const transcriptPath = resolveSessionStoreTranscriptPath(sessionsDir, entry);
if (!transcriptPath) {
continue;
}
if (isDreamingNarrativeSessionStoreKey(sessionKey)) {
dreamingTranscriptPaths.add(transcriptPath);
}
if (isCronRunSessionKey(sessionKey)) {
cronRunTranscriptPaths.add(transcriptPath);
}
}
return {
dreamingNarrativeTranscriptPaths: dreamingTranscriptPaths,
cronRunTranscriptPaths,
};
}
function readSessionTranscriptClassificationStore(
storePath: string,
): Record<string, SessionTranscriptStoreEntry> {
try {
const parsed = JSON.parse(fsSync.readFileSync(storePath, "utf-8")) as unknown;
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
return {};
}
return parsed as Record<string, SessionTranscriptStoreEntry>;
} catch {
return {};
}
}
export function loadDreamingNarrativeTranscriptPathSetForAgent(
agentId: string,
): ReadonlySet<string> {
return loadSessionTranscriptClassificationForAgent(agentId).dreamingNarrativeTranscriptPaths;
}
export function loadSessionTranscriptClassificationForAgent(
agentId: string,
): SessionTranscriptClassification {
return loadSessionTranscriptClassificationForSessionsDir(
resolveSessionTranscriptsDirForAgent(agentId),
);
}
function classifySessionTranscriptFromSessionStore(absPath: string): {
generatedByDreamingNarrative: boolean;
generatedByCronRun: boolean;
} {
const sessionsDir = path.dirname(absPath);
const normalizedAbsPath = normalizeComparablePath(absPath);
const classification = loadSessionTranscriptClassificationForSessionsDir(sessionsDir);
return {
generatedByDreamingNarrative:
classification.dreamingNarrativeTranscriptPaths.has(normalizedAbsPath),
generatedByCronRun: classification.cronRunTranscriptPaths.has(normalizedAbsPath),
};
}
export async function listSessionFilesForAgent(agentId: string): Promise<string[]> {
const dir = resolveSessionTranscriptsDirForAgent(agentId);
try {
const entries = await fs.readdir(dir, { withFileTypes: true });
return entries
.filter((entry) => entry.isFile())
.map((entry) => entry.name)
.filter((name) => isUsageCountedSessionTranscriptFileName(name))
.map((name) => path.join(dir, name));
} catch {
return [];
}
}
export function sessionPathForFile(absPath: string): string {
return path.join("sessions", path.basename(absPath)).replace(/\\/g, "/");
}
async function logSessionFileReadFailure(absPath: string, err: unknown): Promise<void> {
const { createSubsystemLogger } = await import("../../logging/subsystem.js");
createSubsystemLogger("memory").debug(`Failed reading session file ${absPath}: ${String(err)}`);
}
function normalizeSessionText(value: string): string {
return value
.replace(/\s*\n+\s*/g, " ")
.replace(/\s+/g, " ")
.trim();
}
function collectRawSessionText(content: unknown): string | null {
if (typeof content === "string") {
return content;
}
if (!Array.isArray(content)) {
return null;
}
const parts: string[] = [];
for (const block of content) {
if (!block || typeof block !== "object") {
continue;
}
const record = block as { type?: unknown; text?: unknown };
if (record.type === "text" && typeof record.text === "string") {
parts.push(record.text);
}
}
return parts.length > 0 ? parts.join("\n") : null;
}
function isHighSurrogate(code: number): boolean {
return code >= 0xd800 && code <= 0xdbff;
}
function isLowSurrogate(code: number): boolean {
return code >= 0xdc00 && code <= 0xdfff;
}
function splitLongSessionLine(
text: string,
maxChars: number = SESSION_EXPORT_CONTENT_WRAP_CHARS,
): string[] {
const normalized = text.trim();
if (!normalized) {
return [];
}
if (normalized.length <= maxChars) {
return [normalized];
}
const segments: string[] = [];
let cursor = 0;
while (cursor < normalized.length) {
const remaining = normalized.length - cursor;
if (remaining <= maxChars) {
segments.push(normalized.slice(cursor).trim());
break;
}
const limit = cursor + maxChars;
let splitAt = limit;
for (let index = limit; index > cursor; index -= 1) {
if (normalized[index] === " ") {
splitAt = index;
break;
}
}
if (
splitAt < normalized.length &&
splitAt > cursor &&
isHighSurrogate(normalized.charCodeAt(splitAt - 1)) &&
isLowSurrogate(normalized.charCodeAt(splitAt))
) {
splitAt -= 1;
}
segments.push(normalized.slice(cursor, splitAt).trim());
cursor = splitAt;
while (cursor < normalized.length && normalized[cursor] === " ") {
cursor += 1;
}
}
return segments.filter(Boolean);
}
function renderSessionExportLines(label: string, text: string): string[] {
return splitLongSessionLine(text).map((segment) => `${label}: ${segment}`);
}
/**
* Strip OpenClaw-injected inbound metadata envelopes from a raw text block.
*
* User-role messages arriving from external channels (Telegram, Discord,
* Slack, …) are stored with a multi-line prefix containing Conversation info,
* Sender info, and other AI-facing metadata blocks. These envelopes must be
* removed BEFORE normalization, because `stripInboundMetadata` relies on
* newline structure and fenced `json` code fences to locate sentinels; once
* `normalizeSessionText` collapses newlines into spaces, stripping is
* impossible.
*
* See: https://github.com/openclaw/openclaw/issues/63921
*/
function stripInboundMetadataForUserRole(text: string, role: "user" | "assistant"): string {
if (role !== "user") {
return text;
}
return stripInboundMetadata(text);
}
const GENERATED_SYSTEM_MESSAGE_RE = /^System(?: \(untrusted\))?: \[[^\]]+\]\s*/;
function isGeneratedSystemWrapperMessage(text: string, role: "user" | "assistant"): boolean {
if (role !== "user") {
return false;
}
return GENERATED_SYSTEM_MESSAGE_RE.test(text);
}
function isGeneratedCronPromptMessage(text: string, role: "user" | "assistant"): boolean {
if (role !== "user") {
return false;
}
return DIRECT_CRON_PROMPT_RE.test(text);
}
function isGeneratedHeartbeatPromptMessage(text: string, role: "user" | "assistant"): boolean {
return role === "user" && isHeartbeatUserMessage({ role, content: text }, HEARTBEAT_PROMPT);
}
function sanitizeSessionText(text: string, role: "user" | "assistant"): string | null {
const strippedInbound = stripInboundMetadataForUserRole(text, role);
const strippedInternal = stripInternalRuntimeContext(strippedInbound);
const normalized = normalizeSessionText(strippedInternal);
if (!normalized) {
return null;
}
if (isGeneratedSystemWrapperMessage(normalized, role)) {
return null;
}
if (isGeneratedCronPromptMessage(normalized, role)) {
return null;
}
if (isGeneratedHeartbeatPromptMessage(normalized, role)) {
return null;
}
if (isSilentReplyPayloadText(normalized)) {
return null;
}
// Assistant-side machinery acks: HEARTBEAT_OK is the canonical "all clear,
// nothing to do" reply to a heartbeat tick. Drop on the assistant side
// directly so we do not have to rely on cross-message coupling with the
// preceding user message (which a real user could spoof).
if (role === "assistant" && normalized === HEARTBEAT_TOKEN) {
return null;
}
const withoutSystemEnvelope = normalized.replace(GENERATED_SYSTEM_MESSAGE_RE, "").trim();
if (isExecCompletionEvent(withoutSystemEnvelope)) {
return null;
}
return normalized;
}
export function extractSessionText(
content: unknown,
role: "user" | "assistant" = "assistant",
): string | null {
const rawText = collectRawSessionText(content);
if (rawText === null) {
return null;
}
return sanitizeSessionText(rawText, role);
}
function parseSessionTimestampMs(
record: { timestamp?: unknown },
message: { timestamp?: unknown },
): number {
const candidates = [message.timestamp, record.timestamp];
for (const value of candidates) {
if (typeof value === "number" && Number.isFinite(value)) {
const ms = value > 0 && value < 1e11 ? value * 1000 : value;
if (Number.isFinite(ms) && ms > 0) {
return ms;
}
}
if (typeof value === "string") {
const parsed = Date.parse(value);
if (Number.isFinite(parsed) && parsed > 0) {
return parsed;
}
}
}
return 0;
}
export async function buildSessionEntry(
absPath: string,
opts: BuildSessionEntryOptions = {},
): Promise<SessionFileEntry | null> {
try {
const stat = await fs.stat(absPath);
if (shouldSkipTranscriptFileForDreaming(absPath)) {
return {
path: sessionPathForFile(absPath),
absPath,
mtimeMs: stat.mtimeMs,
size: stat.size,
hash: hashText("\n\n"),
content: "",
lineMap: [],
messageTimestampsMs: [],
};
}
const raw = await fs.readFile(absPath, "utf-8");
const lines = raw.split("\n");
const collected: string[] = [];
const lineMap: number[] = [];
const messageTimestampsMs: number[] = [];
const sessionStoreClassification =
opts.generatedByDreamingNarrative === undefined || opts.generatedByCronRun === undefined
? classifySessionTranscriptFromSessionStore(absPath)
: null;
let generatedByDreamingNarrative =
opts.generatedByDreamingNarrative ??
sessionStoreClassification?.generatedByDreamingNarrative ??
false;
const generatedByCronRun =
opts.generatedByCronRun ?? sessionStoreClassification?.generatedByCronRun ?? false;
for (let jsonlIdx = 0; jsonlIdx < lines.length; jsonlIdx++) {
const line = lines[jsonlIdx];
if (!line.trim()) {
continue;
}
let record: unknown;
try {
record = JSON.parse(line);
} catch {
continue;
}
if (!generatedByDreamingNarrative && isDreamingNarrativeGeneratedRecord(record)) {
generatedByDreamingNarrative = true;
}
if (
!record ||
typeof record !== "object" ||
(record as { type?: unknown }).type !== "message"
) {
continue;
}
const message = (record as { message?: unknown }).message as
| { role?: unknown; content?: unknown; provenance?: unknown }
| undefined;
if (!message || typeof message.role !== "string") {
continue;
}
if (message.role !== "user" && message.role !== "assistant") {
continue;
}
if (message.role === "user" && hasInterSessionUserProvenance(message)) {
continue;
}
const rawText = collectRawSessionText(message.content);
if (rawText === null) {
continue;
}
const text = sanitizeSessionText(rawText, message.role);
if (!text) {
// Assistant-side machinery (silent replies, system wrappers) is already
// dropped by sanitizeSessionText. We deliberately do NOT use the prior
// user message's pattern-match to drop the next assistant message:
// user-typed text can match those same patterns (`[cron:...]`,
// `System (untrusted): ...`) and a cross-message drop would let users
// exfiltrate real assistant replies from the dreaming corpus by
// prefixing their own prompt. See PR #70737 review (aisle-research-bot).
continue;
}
if (generatedByDreamingNarrative || generatedByCronRun) {
continue;
}
const safe = redactSensitiveText(text, { mode: "tools" });
const label = message.role === "user" ? "User" : "Assistant";
const renderedLines = renderSessionExportLines(label, safe);
const timestampMs = parseSessionTimestampMs(
record as { timestamp?: unknown },
message as { timestamp?: unknown },
);
collected.push(...renderedLines);
lineMap.push(...renderedLines.map(() => jsonlIdx + 1));
messageTimestampsMs.push(...renderedLines.map(() => timestampMs));
}
const content = collected.join("\n");
return {
path: sessionPathForFile(absPath),
absPath,
mtimeMs: stat.mtimeMs,
size: stat.size,
hash: hashText(content + "\n" + lineMap.join(",") + "\n" + messageTimestampsMs.join(",")),
content,
lineMap,
messageTimestampsMs,
...(generatedByDreamingNarrative ? { generatedByDreamingNarrative: true } : {}),
...(generatedByCronRun ? { generatedByCronRun: true } : {}),
};
} catch (err) {
void logSessionFileReadFailure(absPath, err);
return null;
}
}
export * from "../../../packages/memory-host-sdk/src/host/session-files.js";

View File

@@ -1,107 +1 @@
export type MemorySource = "memory" | "sessions";
export type MemorySearchResult = {
path: string;
startLine: number;
endLine: number;
score: number;
vectorScore?: number;
textScore?: number;
snippet: string;
source: MemorySource;
citation?: string;
};
export type MemoryEmbeddingProbeResult = {
ok: boolean;
error?: string;
checked?: boolean;
cached?: boolean;
checkedAtMs?: number;
cacheExpiresAtMs?: number;
};
export type MemorySyncProgressUpdate = {
completed: number;
total: number;
label?: string;
};
export type MemorySearchRuntimeDebug = {
backend: "builtin" | "qmd";
configuredMode?: string;
effectiveMode?: string;
fallback?: string;
};
export type MemoryReadResult = {
text: string;
path: string;
truncated?: boolean;
from?: number;
lines?: number;
nextFrom?: number;
};
export type MemoryProviderStatus = {
backend: "builtin" | "qmd";
provider: string;
model?: string;
requestedProvider?: string;
files?: number;
chunks?: number;
dirty?: boolean;
workspaceDir?: string;
dbPath?: string;
extraPaths?: string[];
sources?: MemorySource[];
sourceCounts?: Array<{ source: MemorySource; files: number; chunks: number }>;
cache?: { enabled: boolean; entries?: number; maxEntries?: number };
fts?: { enabled: boolean; available: boolean; error?: string };
fallback?: { from: string; reason?: string };
vector?: {
enabled: boolean;
available?: boolean;
extensionPath?: string;
loadError?: string;
dims?: number;
};
batch?: {
enabled: boolean;
failures: number;
limit: number;
wait: boolean;
concurrency: number;
pollIntervalMs: number;
timeoutMs: number;
lastError?: string;
lastProvider?: string;
};
custom?: Record<string, unknown>;
};
export interface MemorySearchManager {
search(
query: string,
opts?: {
maxResults?: number;
minScore?: number;
sessionKey?: string;
qmdSearchModeOverride?: "query" | "search" | "vsearch";
onDebug?: (debug: MemorySearchRuntimeDebug) => void;
sources?: MemorySource[];
},
): Promise<MemorySearchResult[]>;
readFile(params: { relPath: string; from?: number; lines?: number }): Promise<MemoryReadResult>;
status(): MemoryProviderStatus;
sync?(params?: {
reason?: string;
force?: boolean;
sessionFiles?: string[];
progress?: (update: MemorySyncProgressUpdate) => void;
}): Promise<void>;
getCachedEmbeddingAvailability?(): MemoryEmbeddingProbeResult | null;
probeEmbeddingAvailability(): Promise<MemoryEmbeddingProbeResult>;
probeVectorAvailability(): Promise<boolean>;
close?(): Promise<void>;
}
export * from "../../../packages/memory-host-sdk/src/host/types.js";