diff --git a/packages/memory-host-sdk/src/host/openclaw-runtime-io.ts b/packages/memory-host-sdk/src/host/openclaw-runtime-io.ts index beaea0efc20..cc24b099462 100644 --- a/packages/memory-host-sdk/src/host/openclaw-runtime-io.ts +++ b/packages/memory-host-sdk/src/host/openclaw-runtime-io.ts @@ -1,15 +1,38 @@ export { CHARS_PER_TOKEN_ESTIMATE, + DEFAULT_SQLITE_WAL_AUTOCHECKPOINT_PAGES, + DEFAULT_SQLITE_WAL_TRUNCATE_INTERVAL_MS, + applyWindowsSpawnProgramPolicy, + configureSqliteWalMaintenance, createSubsystemLogger, detectMime, estimateStringChars, + installProcessWarningFilter, + materializeWindowsSpawnProgram, redactSensitiveText, resolveGlobalSingleton, resolveUserPath, + resolveWindowsExecutablePath, + resolveWindowsSpawnProgram, + resolveWindowsSpawnProgramCandidate, runTasksWithConcurrency, shortenHomeInString, shortenHomePath, + shouldIgnoreWarning, splitShellArgs, truncateUtf16Safe, writeFileWithinRoot, } from "./openclaw-runtime.js"; + +export type { + ProcessWarning, + ResolveWindowsSpawnProgramCandidateParams, + ResolveWindowsSpawnProgramParams, + SqliteWalMaintenance, + SqliteWalMaintenanceOptions, + WindowsSpawnCandidateResolution, + WindowsSpawnInvocation, + WindowsSpawnProgram, + WindowsSpawnProgramCandidate, + WindowsSpawnResolution, +} from "./openclaw-runtime.js"; diff --git a/packages/memory-host-sdk/src/host/openclaw-runtime.ts b/packages/memory-host-sdk/src/host/openclaw-runtime.ts index f8cfdec5acf..4449b1814d0 100644 --- a/packages/memory-host-sdk/src/host/openclaw-runtime.ts +++ b/packages/memory-host-sdk/src/host/openclaw-runtime.ts @@ -78,6 +78,20 @@ export { writeFileWithinRoot } from "../../../../src/infra/fs-safe.js"; export { fetchWithSsrFGuard } from "../../../../src/infra/net/fetch-guard.js"; export { shouldUseEnvHttpProxyForUrl } from "../../../../src/infra/net/proxy-env.js"; export { ssrfPolicyFromHttpBaseUrlAllowedHostname } from "../../../../src/infra/net/ssrf.js"; +export { + DEFAULT_SQLITE_WAL_AUTOCHECKPOINT_PAGES, + DEFAULT_SQLITE_WAL_TRUNCATE_INTERVAL_MS, + configureSqliteWalMaintenance, +} from "../../../../src/infra/sqlite-wal.js"; +export type { + SqliteWalMaintenance, + SqliteWalMaintenanceOptions, +} from "../../../../src/infra/sqlite-wal.js"; +export { + installProcessWarningFilter, + shouldIgnoreWarning, +} from "../../../../src/infra/warning-filter.js"; +export type { ProcessWarning } from "../../../../src/infra/warning-filter.js"; export { redactSensitiveText } from "../../../../src/logging/redact.js"; export { createSubsystemLogger } from "../../../../src/logging/subsystem.js"; export { detectMime } from "../../../../src/media/mime.js"; @@ -136,4 +150,20 @@ export { shortenHomePath, truncateUtf16Safe, } from "../../../../src/utils.js"; +export { + applyWindowsSpawnProgramPolicy, + materializeWindowsSpawnProgram, + resolveWindowsExecutablePath, + resolveWindowsSpawnProgram, + resolveWindowsSpawnProgramCandidate, +} from "../../../../src/plugin-sdk/windows-spawn.js"; +export type { + ResolveWindowsSpawnProgramCandidateParams, + ResolveWindowsSpawnProgramParams, + WindowsSpawnCandidateResolution, + WindowsSpawnInvocation, + WindowsSpawnProgram, + WindowsSpawnProgramCandidate, + WindowsSpawnResolution, +} from "../../../../src/plugin-sdk/windows-spawn.js"; export { resolveGlobalSingleton } from "../../../../src/shared/global-singleton.js"; diff --git a/packages/memory-host-sdk/src/host/sqlite-wal.ts b/packages/memory-host-sdk/src/host/sqlite-wal.ts index 1d29b1f0ba5..fb721c0ee89 100644 --- a/packages/memory-host-sdk/src/host/sqlite-wal.ts +++ b/packages/memory-host-sdk/src/host/sqlite-wal.ts @@ -1,74 +1,6 @@ -import type { DatabaseSync } from "node:sqlite"; - -const DEFAULT_SQLITE_WAL_AUTOCHECKPOINT_PAGES = 1000; -const DEFAULT_SQLITE_WAL_TRUNCATE_INTERVAL_MS = 30 * 60 * 1000; - -type IntervalHandle = ReturnType & { - unref?: () => void; -}; - -type SqliteWalCheckpointMode = "PASSIVE" | "FULL" | "RESTART" | "TRUNCATE"; - -export type SqliteWalMaintenance = { - checkpoint: () => boolean; - close: () => boolean; -}; - -export type SqliteWalMaintenanceOptions = { - autoCheckpointPages?: number; - checkpointIntervalMs?: number; - checkpointMode?: SqliteWalCheckpointMode; - onCheckpointError?: (error: unknown) => void; -}; - -function normalizeNonNegativeInteger(value: number, label: string): number { - if (!Number.isInteger(value) || value < 0) { - throw new Error(`${label} must be a non-negative integer`); - } - return value; -} - -export function configureSqliteWalMaintenance( - db: DatabaseSync, - options: SqliteWalMaintenanceOptions = {}, -): SqliteWalMaintenance { - const autoCheckpointPages = normalizeNonNegativeInteger( - options.autoCheckpointPages ?? DEFAULT_SQLITE_WAL_AUTOCHECKPOINT_PAGES, - "autoCheckpointPages", - ); - const checkpointIntervalMs = normalizeNonNegativeInteger( - options.checkpointIntervalMs ?? DEFAULT_SQLITE_WAL_TRUNCATE_INTERVAL_MS, - "checkpointIntervalMs", - ); - const checkpointMode = options.checkpointMode ?? "TRUNCATE"; - - db.exec("PRAGMA journal_mode = WAL;"); - db.exec(`PRAGMA wal_autocheckpoint = ${autoCheckpointPages};`); - - const checkpoint = (): boolean => { - try { - db.exec(`PRAGMA wal_checkpoint(${checkpointMode});`); - return true; - } catch (error) { - options.onCheckpointError?.(error); - return false; - } - }; - - let timer: IntervalHandle | null = null; - if (checkpointIntervalMs > 0) { - timer = setInterval(checkpoint, checkpointIntervalMs) as IntervalHandle; - timer.unref?.(); - } - - return { - checkpoint, - close: () => { - if (timer) { - clearInterval(timer); - timer = null; - } - return checkpoint(); - }, - }; -} +export { + DEFAULT_SQLITE_WAL_AUTOCHECKPOINT_PAGES, + DEFAULT_SQLITE_WAL_TRUNCATE_INTERVAL_MS, + configureSqliteWalMaintenance, +} from "./openclaw-runtime-io.js"; +export type { SqliteWalMaintenance, SqliteWalMaintenanceOptions } from "./openclaw-runtime-io.js"; diff --git a/packages/memory-host-sdk/src/host/warning-filter.ts b/packages/memory-host-sdk/src/host/warning-filter.ts index 788459c6e21..7de693f7e86 100644 --- a/packages/memory-host-sdk/src/host/warning-filter.ts +++ b/packages/memory-host-sdk/src/host/warning-filter.ts @@ -1,105 +1,2 @@ -const warningFilterKey = Symbol.for("openclaw.warning-filter"); - -export type ProcessWarning = { - code?: string; - name?: string; - message?: string; -}; - -type ProcessWarningInstallState = { - installed: boolean; -}; - -function resolveWarningFilterState(): ProcessWarningInstallState { - const globalStore = globalThis as Record; - if (Object.prototype.hasOwnProperty.call(globalStore, warningFilterKey)) { - return globalStore[warningFilterKey] as ProcessWarningInstallState; - } - const state = { installed: false }; - globalStore[warningFilterKey] = state; - return state; -} - -export function shouldIgnoreWarning(warning: ProcessWarning): boolean { - if (warning.code === "DEP0040" && warning.message?.includes("punycode")) { - return true; - } - if (warning.code === "DEP0060" && warning.message?.includes("util._extend")) { - return true; - } - if ( - warning.name === "ExperimentalWarning" && - warning.message?.includes("SQLite is an experimental feature") - ) { - return true; - } - return false; -} - -function normalizeWarningArgs(args: unknown[]): ProcessWarning { - const warningArg = args[0]; - const secondArg = args[1]; - const thirdArg = args[2]; - let name: string | undefined; - let code: string | undefined; - let message: string | undefined; - - if (warningArg instanceof Error) { - name = warningArg.name; - message = warningArg.message; - code = (warningArg as Error & { code?: string }).code; - } else if (typeof warningArg === "string") { - message = warningArg; - } - - if (secondArg && typeof secondArg === "object" && !Array.isArray(secondArg)) { - const options = secondArg as { type?: unknown; code?: unknown }; - if (typeof options.type === "string") { - name = options.type; - } - if (typeof options.code === "string") { - code = options.code; - } - } else { - if (typeof secondArg === "string") { - name = secondArg; - } - if (typeof thirdArg === "string") { - code = thirdArg; - } - } - - return { name, code, message }; -} - -export function installProcessWarningFilter(): void { - const state = resolveWarningFilterState(); - if (state.installed) { - return; - } - - const originalEmitWarning = process.emitWarning.bind(process); - const wrappedEmitWarning: typeof process.emitWarning = ((...args: unknown[]) => { - if (shouldIgnoreWarning(normalizeWarningArgs(args))) { - return; - } - if ( - args[0] instanceof Error && - args[1] && - typeof args[1] === "object" && - !Array.isArray(args[1]) - ) { - const warning = args[0]; - const emitted = Object.assign(new Error(warning.message), { - name: warning.name, - code: (warning as Error & { code?: string }).code, - }); - process.emit("warning", emitted); - return; - } - Reflect.apply(originalEmitWarning, process, args); - }) as typeof process.emitWarning; - - process.emitWarning = wrappedEmitWarning; - state.installed = true; -} +export { installProcessWarningFilter, shouldIgnoreWarning } from "./openclaw-runtime-io.js"; +export type { ProcessWarning } from "./openclaw-runtime-io.js"; diff --git a/packages/memory-host-sdk/src/host/windows-spawn.ts b/packages/memory-host-sdk/src/host/windows-spawn.ts index 06acce8cbcf..36d75fbecb9 100644 --- a/packages/memory-host-sdk/src/host/windows-spawn.ts +++ b/packages/memory-host-sdk/src/host/windows-spawn.ts @@ -1,285 +1,9 @@ -import { readFileSync, statSync } from "node:fs"; -import path from "node:path"; -import { normalizeLowercaseStringOrEmpty, normalizeOptionalString } from "./string-utils.js"; - -type WindowsSpawnResolution = "direct" | "node-entrypoint" | "exe-entrypoint" | "shell-fallback"; - -type WindowsSpawnCandidateResolution = Exclude; - -type WindowsSpawnProgramCandidate = { - command: string; - leadingArgv: string[]; - resolution: WindowsSpawnCandidateResolution | "unresolved-wrapper"; - windowsHide?: boolean; -}; - -export type WindowsSpawnProgram = { - command: string; - leadingArgv: string[]; - resolution: WindowsSpawnResolution; - shell?: boolean; - windowsHide?: boolean; -}; - -export type WindowsSpawnInvocation = { - command: string; - argv: string[]; - resolution: WindowsSpawnResolution; - shell?: boolean; - windowsHide?: boolean; -}; - -export type ResolveWindowsSpawnProgramParams = { - command: string; - platform?: NodeJS.Platform; - env?: NodeJS.ProcessEnv; - execPath?: string; - packageName?: string; - allowShellFallback?: boolean; -}; - -function isFilePath(candidate: string): boolean { - try { - return statSync(candidate).isFile(); - } catch { - return false; - } -} - -function resolveWindowsExecutablePath(command: string, env: NodeJS.ProcessEnv): string { - if (command.includes("/") || command.includes("\\") || path.isAbsolute(command)) { - return command; - } - - const pathValue = env.PATH ?? env.Path ?? process.env.PATH ?? process.env.Path ?? ""; - const pathEntries = pathValue - .split(";") - .map((entry) => entry.trim()) - .filter(Boolean); - const hasExtension = path.extname(command).length > 0; - const pathExtRaw = - env.PATHEXT ?? - env.Pathext ?? - process.env.PATHEXT ?? - process.env.Pathext ?? - ".EXE;.CMD;.BAT;.COM"; - const pathExt = hasExtension - ? [""] - : pathExtRaw - .split(";") - .map((ext) => ext.trim()) - .filter(Boolean) - .map((ext) => (ext.startsWith(".") ? ext : `.${ext}`)); - - for (const dir of pathEntries) { - for (const ext of pathExt) { - const normalizedExt = normalizeLowercaseStringOrEmpty(ext); - const uppercaseExt = ext.toUpperCase(); - for (const candidateExt of [ext, normalizedExt, uppercaseExt]) { - const candidate = path.join(dir, `${command}${candidateExt}`); - if (isFilePath(candidate)) { - return candidate; - } - } - } - } - - return command; -} - -function resolveEntrypointFromCmdShim(wrapperPath: string): string | null { - if (!isFilePath(wrapperPath)) { - return null; - } - - try { - const content = readFileSync(wrapperPath, "utf8"); - const candidates: string[] = []; - for (const match of content.matchAll(/"([^"\r\n]*)"/g)) { - const token = match[1] ?? ""; - const relMatch = token.match(/%~?dp0%?\s*[\\/]*(.*)$/i); - const relative = relMatch?.[1]?.trim(); - if (!relative) { - continue; - } - const normalizedRelative = relative.replace(/[\\/]+/g, path.sep).replace(/^[\\/]+/, ""); - const candidate = path.resolve(path.dirname(wrapperPath), normalizedRelative); - if (isFilePath(candidate)) { - candidates.push(candidate); - } - } - const nonNode = candidates.find((candidate) => { - const base = normalizeLowercaseStringOrEmpty(path.basename(candidate)); - return base !== "node.exe" && base !== "node"; - }); - return nonNode ?? null; - } catch { - return null; - } -} - -function resolveBinEntry( - packageName: string | undefined, - binField: string | Record | undefined, -): string | null { - if (typeof binField === "string") { - return normalizeOptionalString(binField) ?? null; - } - if (!binField || typeof binField !== "object") { - return null; - } - if (packageName) { - const preferred = binField[packageName]; - const normalizedPreferred = - typeof preferred === "string" ? normalizeOptionalString(preferred) : undefined; - if (normalizedPreferred) { - return normalizedPreferred; - } - } - for (const value of Object.values(binField)) { - const normalizedValue = typeof value === "string" ? normalizeOptionalString(value) : undefined; - if (normalizedValue) { - return normalizedValue; - } - } - return null; -} - -function resolveEntrypointFromPackageJson( - wrapperPath: string, - packageName?: string, -): string | null { - if (!packageName) { - return null; - } - - const wrapperDir = path.dirname(wrapperPath); - const packageDirs = [ - path.resolve(wrapperDir, "..", packageName), - path.resolve(wrapperDir, "node_modules", packageName), - ]; - - for (const packageDir of packageDirs) { - const packageJsonPath = path.join(packageDir, "package.json"); - if (!isFilePath(packageJsonPath)) { - continue; - } - try { - const packageJson = JSON.parse(readFileSync(packageJsonPath, "utf8")) as { - bin?: string | Record; - }; - const entryRel = resolveBinEntry(packageName, packageJson.bin); - if (!entryRel) { - continue; - } - const entryPath = path.resolve(packageDir, entryRel); - if (isFilePath(entryPath)) { - return entryPath; - } - } catch { - // Ignore malformed package metadata. - } - } - - return null; -} - -function resolveWindowsSpawnProgramCandidate( - params: ResolveWindowsSpawnProgramParams, -): WindowsSpawnProgramCandidate { - const platform = params.platform ?? process.platform; - const env = params.env ?? process.env; - const execPath = params.execPath ?? process.execPath; - - if (platform !== "win32") { - return { - command: params.command, - leadingArgv: [], - resolution: "direct", - }; - } - - const resolvedCommand = resolveWindowsExecutablePath(params.command, env); - const ext = normalizeLowercaseStringOrEmpty(path.extname(resolvedCommand)); - if (ext === ".js" || ext === ".cjs" || ext === ".mjs") { - return { - command: execPath, - leadingArgv: [resolvedCommand], - resolution: "node-entrypoint", - windowsHide: true, - }; - } - - if (ext === ".cmd" || ext === ".bat") { - const entrypoint = - resolveEntrypointFromCmdShim(resolvedCommand) ?? - resolveEntrypointFromPackageJson(resolvedCommand, params.packageName); - if (entrypoint) { - const entryExt = normalizeLowercaseStringOrEmpty(path.extname(entrypoint)); - if (entryExt === ".exe") { - return { - command: entrypoint, - leadingArgv: [], - resolution: "exe-entrypoint", - windowsHide: true, - }; - } - return { - command: execPath, - leadingArgv: [entrypoint], - resolution: "node-entrypoint", - windowsHide: true, - }; - } - - return { - command: resolvedCommand, - leadingArgv: [], - resolution: "unresolved-wrapper", - }; - } - - return { - command: resolvedCommand, - leadingArgv: [], - resolution: "direct", - }; -} - -export function resolveWindowsSpawnProgram( - params: ResolveWindowsSpawnProgramParams, -): WindowsSpawnProgram { - const candidate = resolveWindowsSpawnProgramCandidate(params); - if (candidate.resolution !== "unresolved-wrapper") { - return { - command: candidate.command, - leadingArgv: candidate.leadingArgv, - resolution: candidate.resolution, - windowsHide: candidate.windowsHide, - }; - } - if (params.allowShellFallback === true) { - return { - command: candidate.command, - leadingArgv: [], - resolution: "shell-fallback", - shell: true, - }; - } - throw new Error( - `${path.basename(candidate.command)} wrapper resolved, but no executable/Node entrypoint could be resolved without shell execution.`, - ); -} - -export function materializeWindowsSpawnProgram( - program: WindowsSpawnProgram, - argv: string[], -): WindowsSpawnInvocation { - return { - command: program.command, - argv: [...program.leadingArgv, ...argv], - resolution: program.resolution, - shell: program.shell, - windowsHide: program.windowsHide, - }; -} +export { + materializeWindowsSpawnProgram, + resolveWindowsSpawnProgram, +} from "./openclaw-runtime-io.js"; +export type { + ResolveWindowsSpawnProgramParams, + WindowsSpawnInvocation, + WindowsSpawnProgram, +} from "./openclaw-runtime-io.js"; diff --git a/src/memory-host-sdk/host/backend-config.ts b/src/memory-host-sdk/host/backend-config.ts index efa7780a4d0..db62fe0baa2 100644 --- a/src/memory-host-sdk/host/backend-config.ts +++ b/src/memory-host-sdk/host/backend-config.ts @@ -1,460 +1 @@ -import fs from "node:fs"; -import path from "node:path"; -import { resolveAgentWorkspaceDir } from "../../agents/agent-scope-config.js"; -import { parseDurationMs } from "../../cli/parse-duration.js"; -import type { SessionSendPolicyConfig } from "../../config/types.base.js"; -import type { - MemoryBackend, - MemoryCitationsMode, - MemoryQmdConfig, - MemoryQmdIndexPath, - MemoryQmdMcporterConfig, - MemoryQmdSearchMode, - MemoryQmdStartupMode, -} from "../../config/types.memory.js"; -import type { OpenClawConfig } from "../../config/types.openclaw.js"; -import { CANONICAL_ROOT_MEMORY_FILENAME } from "../../memory/root-memory-files.js"; -import { normalizeAgentId } from "../../routing/session-key.js"; -import { - normalizeLowercaseStringOrEmpty, - normalizeOptionalString, -} from "../../shared/string-coerce.js"; -import { resolveUserPath } from "../../utils.js"; -import { splitShellArgs } from "../../utils/shell-argv.js"; - -export type ResolvedMemoryBackendConfig = { - backend: MemoryBackend; - citations: MemoryCitationsMode; - qmd?: ResolvedQmdConfig; -}; - -export type ResolvedQmdCollection = { - name: string; - path: string; - pattern: string; - kind: "memory" | "custom" | "sessions"; -}; - -export type ResolvedQmdUpdateConfig = { - intervalMs: number; - debounceMs: number; - onBoot: boolean; - startup: MemoryQmdStartupMode; - startupDelayMs: number; - waitForBootSync: boolean; - embedIntervalMs: number; - commandTimeoutMs: number; - updateTimeoutMs: number; - embedTimeoutMs: number; -}; - -export type ResolvedQmdLimitsConfig = { - maxResults: number; - maxSnippetChars: number; - maxInjectedChars: number; - timeoutMs: number; -}; - -export type ResolvedQmdSessionConfig = { - enabled: boolean; - exportDir?: string; - retentionDays?: number; -}; - -export type ResolvedQmdMcporterConfig = { - enabled: boolean; - serverName: string; - startDaemon: boolean; -}; - -export type ResolvedQmdConfig = { - command: string; - mcporter: ResolvedQmdMcporterConfig; - searchMode: MemoryQmdSearchMode; - searchTool?: string; - collections: ResolvedQmdCollection[]; - sessions: ResolvedQmdSessionConfig; - update: ResolvedQmdUpdateConfig; - limits: ResolvedQmdLimitsConfig; - includeDefaultMemory: boolean; - scope?: SessionSendPolicyConfig; -}; - -const DEFAULT_BACKEND: MemoryBackend = "builtin"; -const DEFAULT_CITATIONS: MemoryCitationsMode = "auto"; -const DEFAULT_QMD_INTERVAL = "5m"; -const DEFAULT_QMD_DEBOUNCE_MS = 15_000; -const DEFAULT_QMD_TIMEOUT_MS = 4_000; -// Defaulting to `query` can be extremely slow on CPU-only systems (query expansion + rerank). -// Prefer a faster mode for interactive use; users can opt into `query` for best recall. -const DEFAULT_QMD_SEARCH_MODE: MemoryQmdSearchMode = "search"; -const DEFAULT_QMD_STARTUP: MemoryQmdStartupMode = "off"; -const DEFAULT_QMD_STARTUP_DELAY_MS = 120_000; -const DEFAULT_QMD_EMBED_INTERVAL = "60m"; -const DEFAULT_QMD_COMMAND_TIMEOUT_MS = 30_000; -const DEFAULT_QMD_UPDATE_TIMEOUT_MS = 120_000; -const DEFAULT_QMD_EMBED_TIMEOUT_MS = 120_000; -const DEFAULT_QMD_LIMITS: ResolvedQmdLimitsConfig = { - maxResults: 4, - maxSnippetChars: 450, - maxInjectedChars: 2_200, - timeoutMs: DEFAULT_QMD_TIMEOUT_MS, -}; -const DEFAULT_QMD_MCPORTER: ResolvedQmdMcporterConfig = { - enabled: false, - serverName: "qmd", - startDaemon: true, -}; - -const DEFAULT_QMD_SCOPE: SessionSendPolicyConfig = { - default: "deny", - rules: [ - { - action: "allow", - match: { chatType: "direct" }, - }, - { - action: "allow", - match: { chatType: "channel" }, - }, - ], -}; - -function sanitizeName(input: string): string { - const lower = normalizeLowercaseStringOrEmpty(input).replace(/[^a-z0-9-]+/g, "-"); - const trimmed = lower.replace(/^-+|-+$/g, ""); - return trimmed || "collection"; -} - -function scopeCollectionBase(base: string, agentId: string): string { - return `${base}-${sanitizeName(agentId)}`; -} - -function canonicalizePathForContainment(rawPath: string): string { - const resolved = path.resolve(rawPath); - let current = resolved; - const suffix: string[] = []; - while (true) { - try { - const canonical = path.normalize(fs.realpathSync.native(current)); - return path.normalize(path.join(canonical, ...suffix)); - } catch { - const parent = path.dirname(current); - if (parent === current) { - return path.normalize(resolved); - } - suffix.unshift(path.basename(current)); - current = parent; - } - } -} - -function isPathInsideRoot(candidatePath: string, rootPath: string): boolean { - const relative = path.relative( - canonicalizePathForContainment(rootPath), - canonicalizePathForContainment(candidatePath), - ); - return relative === "" || (!relative.startsWith("..") && !path.isAbsolute(relative)); -} - -function ensureUniqueName(base: string, existing: Set): string { - let name = sanitizeName(base); - if (!existing.has(name)) { - existing.add(name); - return name; - } - let suffix = 2; - while (existing.has(`${name}-${suffix}`)) { - suffix += 1; - } - const unique = `${name}-${suffix}`; - existing.add(unique); - return unique; -} - -function resolvePath(raw: string, workspaceDir: string): string { - const trimmed = raw.trim(); - if (!trimmed) { - throw new Error("path required"); - } - if (trimmed.startsWith("~") || path.isAbsolute(trimmed)) { - return path.normalize(resolveUserPath(trimmed)); - } - return path.normalize(path.resolve(workspaceDir, trimmed)); -} - -function resolveIntervalMs(raw: string | undefined): number { - const value = raw?.trim(); - if (!value) { - return parseDurationMs(DEFAULT_QMD_INTERVAL, { defaultUnit: "m" }); - } - try { - return parseDurationMs(value, { defaultUnit: "m" }); - } catch { - return parseDurationMs(DEFAULT_QMD_INTERVAL, { defaultUnit: "m" }); - } -} - -function resolveEmbedIntervalMs(raw: string | undefined): number { - const value = raw?.trim(); - if (!value) { - return parseDurationMs(DEFAULT_QMD_EMBED_INTERVAL, { defaultUnit: "m" }); - } - try { - return parseDurationMs(value, { defaultUnit: "m" }); - } catch { - return parseDurationMs(DEFAULT_QMD_EMBED_INTERVAL, { defaultUnit: "m" }); - } -} - -function resolveDebounceMs(raw: number | undefined): number { - if (typeof raw === "number" && Number.isFinite(raw) && raw >= 0) { - return Math.floor(raw); - } - return DEFAULT_QMD_DEBOUNCE_MS; -} - -function resolveTimeoutMs(raw: number | undefined, fallback: number): number { - if (typeof raw === "number" && Number.isFinite(raw) && raw > 0) { - return Math.floor(raw); - } - return fallback; -} - -function resolveStartupMode(raw: MemoryQmdConfig["update"]): MemoryQmdStartupMode { - const value = raw?.startup; - if (value === "idle" || value === "immediate" || value === "off") { - return value; - } - return DEFAULT_QMD_STARTUP; -} - -function resolveStartupDelayMs(raw: number | undefined): number { - if (typeof raw === "number" && Number.isFinite(raw) && raw >= 0) { - return Math.floor(raw); - } - return DEFAULT_QMD_STARTUP_DELAY_MS; -} - -function resolveLimits(raw?: MemoryQmdConfig["limits"]): ResolvedQmdLimitsConfig { - const parsed: ResolvedQmdLimitsConfig = { ...DEFAULT_QMD_LIMITS }; - if (raw?.maxResults && raw.maxResults > 0) { - parsed.maxResults = Math.floor(raw.maxResults); - } - if (raw?.maxSnippetChars && raw.maxSnippetChars > 0) { - parsed.maxSnippetChars = Math.floor(raw.maxSnippetChars); - } - if (raw?.maxInjectedChars && raw.maxInjectedChars > 0) { - parsed.maxInjectedChars = Math.floor(raw.maxInjectedChars); - } - if (raw?.timeoutMs && raw.timeoutMs > 0) { - parsed.timeoutMs = Math.floor(raw.timeoutMs); - } - return parsed; -} - -function resolveSearchMode(raw?: MemoryQmdConfig["searchMode"]): MemoryQmdSearchMode { - if (raw === "search" || raw === "vsearch" || raw === "query") { - return raw; - } - return DEFAULT_QMD_SEARCH_MODE; -} - -function resolveSearchTool(raw?: MemoryQmdConfig["searchTool"]): string | undefined { - const value = raw?.trim(); - return value ? value : undefined; -} - -function resolveSessionConfig( - cfg: MemoryQmdConfig["sessions"], - workspaceDir: string, -): ResolvedQmdSessionConfig { - const enabled = Boolean(cfg?.enabled); - const exportDirRaw = cfg?.exportDir?.trim(); - const exportDir = exportDirRaw ? resolvePath(exportDirRaw, workspaceDir) : undefined; - const retentionDays = - cfg?.retentionDays && cfg.retentionDays > 0 ? Math.floor(cfg.retentionDays) : undefined; - return { - enabled, - exportDir, - retentionDays, - }; -} - -function resolveCustomPaths( - rawPaths: MemoryQmdIndexPath[] | undefined, - workspaceDir: string, - existing: Set, - agentId: string, -): ResolvedQmdCollection[] { - if (!rawPaths?.length) { - return []; - } - const collections: ResolvedQmdCollection[] = []; - const seenRoots = new Set(); - rawPaths.forEach((entry, index) => { - const trimmedPath = normalizeOptionalString(entry?.path); - if (!trimmedPath) { - return; - } - let resolved: string; - try { - resolved = resolvePath(trimmedPath, workspaceDir); - } catch { - return; - } - const pattern = normalizeOptionalString(entry.pattern) || "**/*.md"; - const dedupeKey = `${resolved}\u0000${pattern}`; - if (seenRoots.has(dedupeKey)) { - return; - } - seenRoots.add(dedupeKey); - const explicitName = entry.name?.trim(); - const baseName = - explicitName && !isPathInsideRoot(resolved, workspaceDir) - ? explicitName - : scopeCollectionBase(explicitName || `custom-${index + 1}`, agentId); - const name = ensureUniqueName(baseName, existing); - collections.push({ - name, - path: resolved, - pattern, - kind: "custom", - }); - }); - return collections; -} - -function resolveMcporterConfig(raw?: MemoryQmdMcporterConfig): ResolvedQmdMcporterConfig { - const parsed: ResolvedQmdMcporterConfig = { ...DEFAULT_QMD_MCPORTER }; - if (!raw) { - return parsed; - } - if (raw.enabled !== undefined) { - parsed.enabled = raw.enabled; - } - if (typeof raw.serverName === "string" && raw.serverName.trim()) { - parsed.serverName = raw.serverName.trim(); - } - if (raw.startDaemon !== undefined) { - parsed.startDaemon = raw.startDaemon; - } - // When enabled, default startDaemon to true. - if (parsed.enabled && raw.startDaemon === undefined) { - parsed.startDaemon = true; - } - return parsed; -} - -function resolveDefaultCollections( - include: boolean, - workspaceDir: string, - existing: Set, - agentId: string, -): ResolvedQmdCollection[] { - if (!include) { - return []; - } - const entries: Array<{ path: string; pattern: string; base: string }> = [ - { path: workspaceDir, pattern: CANONICAL_ROOT_MEMORY_FILENAME, base: "memory-root" }, - { path: path.join(workspaceDir, "memory"), pattern: "**/*.md", base: "memory-dir" }, - ]; - return entries.map((entry) => ({ - name: ensureUniqueName(scopeCollectionBase(entry.base, agentId), existing), - path: entry.path, - pattern: entry.pattern, - kind: "memory", - })); -} - -export function resolveMemoryBackendConfig(params: { - cfg: OpenClawConfig; - agentId: string; -}): ResolvedMemoryBackendConfig { - const normalizedAgentId = normalizeAgentId(params.agentId); - const backend = params.cfg.memory?.backend ?? DEFAULT_BACKEND; - const citations = params.cfg.memory?.citations ?? DEFAULT_CITATIONS; - if (backend !== "qmd") { - return { backend: "builtin", citations }; - } - - const workspaceDir = resolveAgentWorkspaceDir(params.cfg, normalizedAgentId); - const qmdCfg = params.cfg.memory?.qmd; - const includeDefaultMemory = qmdCfg?.includeDefaultMemory !== false; - const nameSet = new Set(); - const agentEntry = params.cfg.agents?.list?.find( - (entry) => normalizeAgentId(entry?.id) === normalizedAgentId, - ); - const mergedExtraPaths = [ - ...(params.cfg.agents?.defaults?.memorySearch?.extraPaths ?? []), - ...(agentEntry?.memorySearch?.extraPaths ?? []), - ] - .filter((value): value is string => typeof value === "string") - .map((value) => value.trim()) - .filter(Boolean); - const dedupedExtraPaths = Array.from(new Set(mergedExtraPaths)); - const searchExtraPaths = dedupedExtraPaths.map( - (pathValue): { path: string; pattern?: string; name?: string } => ({ path: pathValue }), - ); - const mergedExtraCollections = [ - ...(params.cfg.agents?.defaults?.memorySearch?.qmd?.extraCollections ?? []), - ...(agentEntry?.memorySearch?.qmd?.extraCollections ?? []), - ].filter( - (value): value is MemoryQmdIndexPath => - value !== null && typeof value === "object" && typeof value.path === "string", - ); - - // Combine QMD-specific paths with extraPaths and per-agent cross-agent collections. - const allQmdPaths: MemoryQmdIndexPath[] = [ - ...(qmdCfg?.paths ?? []), - ...searchExtraPaths, - ...mergedExtraCollections, - ]; - - const collections = [ - ...resolveDefaultCollections(includeDefaultMemory, workspaceDir, nameSet, normalizedAgentId), - ...resolveCustomPaths(allQmdPaths, workspaceDir, nameSet, normalizedAgentId), - ]; - - const rawCommand = normalizeOptionalString(qmdCfg?.command) || "qmd"; - const parsedCommand = splitShellArgs(rawCommand); - const command = parsedCommand?.[0] || rawCommand.split(/\s+/)[0] || "qmd"; - const resolved: ResolvedQmdConfig = { - command, - mcporter: resolveMcporterConfig(qmdCfg?.mcporter), - searchMode: resolveSearchMode(qmdCfg?.searchMode), - searchTool: resolveSearchTool(qmdCfg?.searchTool), - collections, - includeDefaultMemory, - sessions: resolveSessionConfig(qmdCfg?.sessions, workspaceDir), - update: { - intervalMs: resolveIntervalMs(qmdCfg?.update?.interval), - debounceMs: resolveDebounceMs(qmdCfg?.update?.debounceMs), - onBoot: qmdCfg?.update?.onBoot !== false, - startup: resolveStartupMode(qmdCfg?.update), - startupDelayMs: resolveStartupDelayMs(qmdCfg?.update?.startupDelayMs), - waitForBootSync: qmdCfg?.update?.waitForBootSync === true, - embedIntervalMs: resolveEmbedIntervalMs(qmdCfg?.update?.embedInterval), - commandTimeoutMs: resolveTimeoutMs( - qmdCfg?.update?.commandTimeoutMs, - DEFAULT_QMD_COMMAND_TIMEOUT_MS, - ), - updateTimeoutMs: resolveTimeoutMs( - qmdCfg?.update?.updateTimeoutMs, - DEFAULT_QMD_UPDATE_TIMEOUT_MS, - ), - embedTimeoutMs: resolveTimeoutMs( - qmdCfg?.update?.embedTimeoutMs, - DEFAULT_QMD_EMBED_TIMEOUT_MS, - ), - }, - limits: resolveLimits(qmdCfg?.limits), - scope: qmdCfg?.scope ?? DEFAULT_QMD_SCOPE, - }; - - return { - backend: "qmd", - citations, - qmd: resolved, - }; -} +export * from "../../../packages/memory-host-sdk/src/host/backend-config.js"; diff --git a/src/memory-host-sdk/host/batch-output.ts b/src/memory-host-sdk/host/batch-output.ts index e2a75a878da..2ce39fc2461 100644 --- a/src/memory-host-sdk/host/batch-output.ts +++ b/src/memory-host-sdk/host/batch-output.ts @@ -1,55 +1 @@ -export type EmbeddingBatchOutputLine = { - custom_id?: string; - error?: { message?: string }; - response?: { - status_code?: number; - body?: - | { - data?: Array<{ - embedding?: number[]; - }>; - error?: { message?: string }; - } - | string; - }; -}; - -export function applyEmbeddingBatchOutputLine(params: { - line: EmbeddingBatchOutputLine; - remaining: Set; - errors: string[]; - byCustomId: Map; -}) { - const customId = params.line.custom_id; - if (!customId) { - return; - } - params.remaining.delete(customId); - - const errorMessage = params.line.error?.message; - if (errorMessage) { - params.errors.push(`${customId}: ${errorMessage}`); - return; - } - - const response = params.line.response; - const statusCode = response?.status_code ?? 0; - if (statusCode >= 400) { - const messageFromObject = - response?.body && typeof response.body === "object" - ? (response.body as { error?: { message?: string } }).error?.message - : undefined; - const messageFromString = typeof response?.body === "string" ? response.body : undefined; - params.errors.push(`${customId}: ${messageFromObject ?? messageFromString ?? "unknown error"}`); - return; - } - - const data = - response?.body && typeof response.body === "object" ? (response.body.data ?? []) : []; - const embedding = data[0]?.embedding ?? []; - if (embedding.length === 0) { - params.errors.push(`${customId}: empty embedding`); - return; - } - params.byCustomId.set(customId, embedding); -} +export * from "../../../packages/memory-host-sdk/src/host/batch-output.js"; diff --git a/src/memory-host-sdk/host/batch-status.ts b/src/memory-host-sdk/host/batch-status.ts index 96e8da62894..fa25fbc06b4 100644 --- a/src/memory-host-sdk/host/batch-status.ts +++ b/src/memory-host-sdk/host/batch-status.ts @@ -1,69 +1 @@ -const TERMINAL_FAILURE_STATES = new Set(["failed", "expired", "cancelled", "canceled"]); - -type BatchStatusLike = { - id?: string; - status?: string; - output_file_id?: string | null; - error_file_id?: string | null; -}; - -export type BatchCompletionResult = { - outputFileId: string; - errorFileId?: string; -}; - -export function resolveBatchCompletionFromStatus(params: { - provider: string; - batchId: string; - status: BatchStatusLike; -}): BatchCompletionResult { - if (!params.status.output_file_id) { - throw new Error(`${params.provider} batch ${params.batchId} completed without output file`); - } - return { - outputFileId: params.status.output_file_id, - errorFileId: params.status.error_file_id ?? undefined, - }; -} - -export async function throwIfBatchTerminalFailure(params: { - provider: string; - status: BatchStatusLike; - readError: (errorFileId: string) => Promise; -}): Promise { - const state = params.status.status ?? "unknown"; - if (!TERMINAL_FAILURE_STATES.has(state)) { - return; - } - const detail = params.status.error_file_id - ? await params.readError(params.status.error_file_id) - : undefined; - const suffix = detail ? `: ${detail}` : ""; - throw new Error(`${params.provider} batch ${params.status.id ?? ""} ${state}${suffix}`); -} - -export async function resolveCompletedBatchResult(params: { - provider: string; - status: BatchStatusLike; - wait: boolean; - waitForBatch: () => Promise; -}): Promise { - const batchId = params.status.id ?? ""; - if (!params.wait && params.status.status !== "completed") { - throw new Error( - `${params.provider} batch ${batchId} submitted; enable remote.batch.wait to await completion`, - ); - } - const completed = - params.status.status === "completed" - ? resolveBatchCompletionFromStatus({ - provider: params.provider, - batchId, - status: params.status, - }) - : await params.waitForBatch(); - if (!completed.outputFileId) { - throw new Error(`${params.provider} batch ${batchId} completed without output file`); - } - return completed; -} +export * from "../../../packages/memory-host-sdk/src/host/batch-status.js"; diff --git a/src/memory-host-sdk/host/embedding-input-limits.ts b/src/memory-host-sdk/host/embedding-input-limits.ts index 4eadf1bf48d..406864a58c4 100644 --- a/src/memory-host-sdk/host/embedding-input-limits.ts +++ b/src/memory-host-sdk/host/embedding-input-limits.ts @@ -1,85 +1 @@ -import type { EmbeddingInput } from "./embedding-inputs.js"; - -// Helpers for enforcing embedding model input size limits. -// -// We use UTF-8 byte length as a conservative upper bound for tokenizer output. -// Tokenizers operate over bytes; a token must contain at least one byte, so -// token_count <= utf8_byte_length. - -export function estimateUtf8Bytes(text: string): number { - if (!text) { - return 0; - } - return Buffer.byteLength(text, "utf8"); -} - -export function estimateStructuredEmbeddingInputBytes(input: EmbeddingInput): number { - if (!input.parts?.length) { - return estimateUtf8Bytes(input.text); - } - let total = 0; - for (const part of input.parts) { - if (part.type === "text") { - total += estimateUtf8Bytes(part.text); - continue; - } - total += estimateUtf8Bytes(part.mimeType); - total += estimateUtf8Bytes(part.data); - } - return total; -} - -export function splitTextToUtf8ByteLimit(text: string, maxUtf8Bytes: number): string[] { - if (maxUtf8Bytes <= 0) { - return [text]; - } - if (estimateUtf8Bytes(text) <= maxUtf8Bytes) { - return [text]; - } - - const parts: string[] = []; - let cursor = 0; - while (cursor < text.length) { - // The number of UTF-16 code units is always <= the number of UTF-8 bytes. - // This makes `cursor + maxUtf8Bytes` a safe upper bound on the next split point. - let low = cursor + 1; - let high = Math.min(text.length, cursor + maxUtf8Bytes); - let best = cursor; - - while (low <= high) { - const mid = Math.floor((low + high) / 2); - const bytes = estimateUtf8Bytes(text.slice(cursor, mid)); - if (bytes <= maxUtf8Bytes) { - best = mid; - low = mid + 1; - } else { - high = mid - 1; - } - } - - if (best <= cursor) { - best = Math.min(text.length, cursor + 1); - } - - // Avoid splitting inside a surrogate pair. - if ( - best < text.length && - best > cursor && - text.charCodeAt(best - 1) >= 0xd800 && - text.charCodeAt(best - 1) <= 0xdbff && - text.charCodeAt(best) >= 0xdc00 && - text.charCodeAt(best) <= 0xdfff - ) { - best -= 1; - } - - const part = text.slice(cursor, best); - if (!part) { - break; - } - parts.push(part); - cursor = best; - } - - return parts; -} +export * from "../../../packages/memory-host-sdk/src/host/embedding-input-limits.js"; diff --git a/src/memory-host-sdk/host/embeddings-remote-provider.ts b/src/memory-host-sdk/host/embeddings-remote-provider.ts index d416349da48..2be2fb5f9e0 100644 --- a/src/memory-host-sdk/host/embeddings-remote-provider.ts +++ b/src/memory-host-sdk/host/embeddings-remote-provider.ts @@ -1,65 +1 @@ -import type { SsrFPolicy } from "../../infra/net/ssrf.js"; -import { - resolveRemoteEmbeddingBearerClient, - type RemoteEmbeddingProviderId, -} from "./embeddings-remote-client.js"; -import { fetchRemoteEmbeddingVectors } from "./embeddings-remote-fetch.js"; -import type { EmbeddingProvider, EmbeddingProviderOptions } from "./embeddings.types.js"; - -export type RemoteEmbeddingClient = { - baseUrl: string; - headers: Record; - ssrfPolicy?: SsrFPolicy; - fetchImpl?: typeof fetch; - model: string; -}; - -export function createRemoteEmbeddingProvider(params: { - id: string; - client: RemoteEmbeddingClient; - errorPrefix: string; - maxInputTokens?: number; -}): EmbeddingProvider { - const { client } = params; - const url = `${client.baseUrl.replace(/\/$/, "")}/embeddings`; - - const embed = async (input: string[]): Promise => { - if (input.length === 0) { - return []; - } - return await fetchRemoteEmbeddingVectors({ - url, - headers: client.headers, - ssrfPolicy: client.ssrfPolicy, - fetchImpl: client.fetchImpl, - body: { model: client.model, input }, - errorPrefix: params.errorPrefix, - }); - }; - - return { - id: params.id, - model: client.model, - ...(typeof params.maxInputTokens === "number" ? { maxInputTokens: params.maxInputTokens } : {}), - embedQuery: async (text) => { - const [vec] = await embed([text]); - return vec ?? []; - }, - embedBatch: embed, - }; -} - -export async function resolveRemoteEmbeddingClient(params: { - provider: RemoteEmbeddingProviderId; - options: EmbeddingProviderOptions; - defaultBaseUrl: string; - normalizeModel: (model: string) => string; -}): Promise { - const { baseUrl, headers, ssrfPolicy } = await resolveRemoteEmbeddingBearerClient({ - provider: params.provider, - options: params.options, - defaultBaseUrl: params.defaultBaseUrl, - }); - const model = params.normalizeModel(params.options.model); - return { baseUrl, headers, ssrfPolicy, model }; -} +export * from "../../../packages/memory-host-sdk/src/host/embeddings-remote-provider.js"; diff --git a/src/memory-host-sdk/host/internal.ts b/src/memory-host-sdk/host/internal.ts index a84e18bcb06..12d27c15f3e 100644 --- a/src/memory-host-sdk/host/internal.ts +++ b/src/memory-host-sdk/host/internal.ts @@ -1,525 +1 @@ -import crypto from "node:crypto"; -import fsSync from "node:fs"; -import fs from "node:fs/promises"; -import path from "node:path"; -import { detectMime } from "../../media/mime.js"; -import { - CANONICAL_ROOT_MEMORY_FILENAME, - resolveCanonicalRootMemoryFile, - shouldSkipRootMemoryAuxiliaryPath, -} from "../../memory/root-memory-files.js"; -import { CHARS_PER_TOKEN_ESTIMATE, estimateStringChars } from "../../utils/cjk-chars.js"; -import { runTasksWithConcurrency } from "../../utils/run-with-concurrency.js"; -import { estimateStructuredEmbeddingInputBytes } from "./embedding-input-limits.js"; -import { buildTextEmbeddingInput, type EmbeddingInput } from "./embedding-inputs.js"; -import { isFileMissingError } from "./fs-utils.js"; -import { - buildMemoryMultimodalLabel, - classifyMemoryMultimodalPath, - type MemoryMultimodalModality, - type MemoryMultimodalSettings, -} from "./multimodal.js"; - -export { hashText } from "./hash.js"; -import { hashText } from "./hash.js"; - -export type MemoryFileEntry = { - path: string; - absPath: string; - mtimeMs: number; - size: number; - hash: string; - dataHash?: string; - kind?: "markdown" | "multimodal"; - contentText?: string; - modality?: MemoryMultimodalModality; - mimeType?: string; -}; - -export type MemoryChunk = { - startLine: number; - endLine: number; - text: string; - hash: string; - embeddingInput?: EmbeddingInput; -}; - -export type MultimodalMemoryChunk = { - chunk: MemoryChunk; - structuredInputBytes: number; -}; - -const DISABLED_MULTIMODAL_SETTINGS: MemoryMultimodalSettings = { - enabled: false, - modalities: [], - maxFileBytes: 0, -}; - -export function ensureDir(dir: string): string { - try { - fsSync.mkdirSync(dir, { recursive: true }); - } catch {} - return dir; -} - -export function normalizeRelPath(value: string): string { - const trimmed = value.trim().replace(/^[./]+/, ""); - return trimmed.replace(/\\/g, "/"); -} - -export function normalizeExtraMemoryPaths(workspaceDir: string, extraPaths?: string[]): string[] { - if (!extraPaths?.length) { - return []; - } - const resolved = extraPaths - .map((value) => value.trim()) - .filter(Boolean) - .map((value) => - path.isAbsolute(value) ? path.resolve(value) : path.resolve(workspaceDir, value), - ); - return Array.from(new Set(resolved)); -} - -export function isMemoryPath(relPath: string): boolean { - const normalized = normalizeRelPath(relPath); - if (!normalized) { - return false; - } - if (normalized === CANONICAL_ROOT_MEMORY_FILENAME || normalized === "DREAMS.md") { - return true; - } - return normalized.startsWith("memory/"); -} - -function isAllowedMemoryFilePath(filePath: string, multimodal?: MemoryMultimodalSettings): boolean { - if (filePath.endsWith(".md")) { - return true; - } - return ( - classifyMemoryMultimodalPath(filePath, multimodal ?? DISABLED_MULTIMODAL_SETTINGS) !== null - ); -} - -async function walkDir( - dir: string, - files: string[], - multimodal?: MemoryMultimodalSettings, - shouldSkipPath?: (absPath: string) => boolean, -) { - const entries = await fs.readdir(dir, { withFileTypes: true }); - for (const entry of entries) { - const full = path.join(dir, entry.name); - if (shouldSkipPath?.(full)) { - continue; - } - if (entry.isSymbolicLink()) { - continue; - } - if (entry.isDirectory()) { - if (entry.name === ".openclaw-repair") { - continue; - } - await walkDir(full, files, multimodal, shouldSkipPath); - continue; - } - if (!entry.isFile()) { - continue; - } - if (!isAllowedMemoryFilePath(full, multimodal)) { - continue; - } - files.push(full); - } -} - -export async function listMemoryFiles( - workspaceDir: string, - extraPaths?: string[], - multimodal?: MemoryMultimodalSettings, -): Promise { - const result: string[] = []; - const memoryDir = path.join(workspaceDir, "memory"); - - const shouldSkipWorkspaceMemoryPath = (absPath: string): boolean => - shouldSkipRootMemoryAuxiliaryPath({ workspaceDir, absPath }); - - const addMarkdownFile = async (absPath: string) => { - try { - const stat = await fs.lstat(absPath); - if (stat.isSymbolicLink() || !stat.isFile()) { - return; - } - if (!absPath.endsWith(".md")) { - return; - } - result.push(absPath); - } catch {} - }; - - const memoryFile = await resolveCanonicalRootMemoryFile(workspaceDir); - if (memoryFile) { - await addMarkdownFile(memoryFile); - } - try { - const dirStat = await fs.lstat(memoryDir); - if (!dirStat.isSymbolicLink() && dirStat.isDirectory()) { - await walkDir(memoryDir, result, multimodal, shouldSkipWorkspaceMemoryPath); - } - } catch {} - - const normalizedExtraPaths = normalizeExtraMemoryPaths(workspaceDir, extraPaths); - if (normalizedExtraPaths.length > 0) { - for (const inputPath of normalizedExtraPaths) { - if (shouldSkipWorkspaceMemoryPath(inputPath)) { - continue; - } - try { - const stat = await fs.lstat(inputPath); - if (stat.isSymbolicLink()) { - continue; - } - if (stat.isDirectory()) { - await walkDir(inputPath, result, multimodal, shouldSkipWorkspaceMemoryPath); - continue; - } - if (stat.isFile() && isAllowedMemoryFilePath(inputPath, multimodal)) { - result.push(inputPath); - } - } catch {} - } - } - if (result.length <= 1) { - return result; - } - const seen = new Set(); - const deduped: string[] = []; - for (const entry of result) { - let key = entry; - try { - key = await fs.realpath(entry); - } catch {} - if (seen.has(key)) { - continue; - } - seen.add(key); - deduped.push(entry); - } - return deduped; -} - -export async function buildFileEntry( - absPath: string, - workspaceDir: string, - multimodal?: MemoryMultimodalSettings, -): Promise { - let stat; - try { - stat = await fs.stat(absPath); - } catch (err) { - if (isFileMissingError(err)) { - return null; - } - throw err; - } - const normalizedPath = path.relative(workspaceDir, absPath).replace(/\\/g, "/"); - const multimodalSettings = multimodal ?? DISABLED_MULTIMODAL_SETTINGS; - const modality = classifyMemoryMultimodalPath(absPath, multimodalSettings); - if (modality) { - if (stat.size > multimodalSettings.maxFileBytes) { - return null; - } - let buffer: Buffer; - try { - buffer = await fs.readFile(absPath); - } catch (err) { - if (isFileMissingError(err)) { - return null; - } - throw err; - } - const mimeType = await detectMime({ buffer: buffer.subarray(0, 512), filePath: absPath }); - if (!mimeType || !mimeType.startsWith(`${modality}/`)) { - return null; - } - const contentText = buildMemoryMultimodalLabel(modality, normalizedPath); - const dataHash = crypto.createHash("sha256").update(buffer).digest("hex"); - const chunkHash = hashText( - JSON.stringify({ - path: normalizedPath, - contentText, - mimeType, - dataHash, - }), - ); - return { - path: normalizedPath, - absPath, - mtimeMs: stat.mtimeMs, - size: stat.size, - hash: chunkHash, - dataHash, - kind: "multimodal", - contentText, - modality, - mimeType, - }; - } - let content: string; - try { - content = await fs.readFile(absPath, "utf-8"); - } catch (err) { - if (isFileMissingError(err)) { - return null; - } - throw err; - } - const hash = hashText(content); - return { - path: normalizedPath, - absPath, - mtimeMs: stat.mtimeMs, - size: stat.size, - hash, - kind: "markdown", - }; -} - -async function loadMultimodalEmbeddingInput( - entry: Pick< - MemoryFileEntry, - "absPath" | "contentText" | "mimeType" | "kind" | "size" | "dataHash" - >, -): Promise { - if (entry.kind !== "multimodal" || !entry.contentText || !entry.mimeType) { - return null; - } - let stat; - try { - stat = await fs.stat(entry.absPath); - } catch (err) { - if (isFileMissingError(err)) { - return null; - } - throw err; - } - if (stat.size !== entry.size) { - return null; - } - let buffer: Buffer; - try { - buffer = await fs.readFile(entry.absPath); - } catch (err) { - if (isFileMissingError(err)) { - return null; - } - throw err; - } - const dataHash = crypto.createHash("sha256").update(buffer).digest("hex"); - if (entry.dataHash && entry.dataHash !== dataHash) { - return null; - } - return { - text: entry.contentText, - parts: [ - { type: "text", text: entry.contentText }, - { - type: "inline-data", - mimeType: entry.mimeType, - data: buffer.toString("base64"), - }, - ], - }; -} - -export async function buildMultimodalChunkForIndexing( - entry: Pick< - MemoryFileEntry, - "absPath" | "contentText" | "mimeType" | "kind" | "hash" | "size" | "dataHash" - >, -): Promise { - const embeddingInput = await loadMultimodalEmbeddingInput(entry); - if (!embeddingInput) { - return null; - } - return { - chunk: { - startLine: 1, - endLine: 1, - text: entry.contentText ?? embeddingInput.text, - hash: entry.hash, - embeddingInput, - }, - structuredInputBytes: estimateStructuredEmbeddingInputBytes(embeddingInput), - }; -} - -export function chunkMarkdown( - content: string, - chunking: { tokens: number; overlap: number }, -): MemoryChunk[] { - const lines = content.split("\n"); - if (lines.length === 0) { - return []; - } - const maxChars = Math.max(32, chunking.tokens * CHARS_PER_TOKEN_ESTIMATE); - const overlapChars = Math.max(0, chunking.overlap * CHARS_PER_TOKEN_ESTIMATE); - const chunks: MemoryChunk[] = []; - - let current: Array<{ line: string; lineNo: number }> = []; - let currentChars = 0; - - const flush = () => { - if (current.length === 0) { - return; - } - const firstEntry = current[0]; - const lastEntry = current[current.length - 1]; - if (!firstEntry || !lastEntry) { - return; - } - const text = current.map((entry) => entry.line).join("\n"); - const startLine = firstEntry.lineNo; - const endLine = lastEntry.lineNo; - chunks.push({ - startLine, - endLine, - text, - hash: hashText(text), - embeddingInput: buildTextEmbeddingInput(text), - }); - }; - - const carryOverlap = () => { - if (overlapChars <= 0 || current.length === 0) { - current = []; - currentChars = 0; - return; - } - let acc = 0; - const kept: Array<{ line: string; lineNo: number }> = []; - for (let i = current.length - 1; i >= 0; i -= 1) { - const entry = current[i]; - if (!entry) { - continue; - } - acc += estimateStringChars(entry.line) + 1; - kept.unshift(entry); - if (acc >= overlapChars) { - break; - } - } - current = kept; - currentChars = acc; - }; - - for (let i = 0; i < lines.length; i += 1) { - const line = lines[i] ?? ""; - const lineNo = i + 1; - const segments: string[] = []; - if (line.length === 0) { - segments.push(""); - } else { - // First pass: slice at maxChars (preserves original behaviour for Latin). - // Second pass: if a segment's *weighted* size still exceeds the budget - // (happens for CJK-heavy text where 1 char ≈ 1 token), re-split it at - // chunking.tokens so the chunk stays within the token budget. - for (let start = 0; start < line.length; start += maxChars) { - const coarse = line.slice(start, start + maxChars); - if (estimateStringChars(coarse) > maxChars) { - const fineStep = Math.max(1, chunking.tokens); - for (let j = 0; j < coarse.length; ) { - let end = Math.min(j + fineStep, coarse.length); - // Avoid splitting inside a UTF-16 surrogate pair (CJK Extension B+). - if (end < coarse.length) { - const code = coarse.charCodeAt(end - 1); - if (code >= 0xd800 && code <= 0xdbff) { - end += 1; // include the low surrogate - } - } - segments.push(coarse.slice(j, end)); - j = end; // advance cursor to the adjusted boundary - } - } else { - segments.push(coarse); - } - } - } - for (const segment of segments) { - const lineSize = estimateStringChars(segment) + 1; - if (currentChars + lineSize > maxChars && current.length > 0) { - flush(); - carryOverlap(); - } - current.push({ line: segment, lineNo }); - currentChars += lineSize; - } - } - flush(); - return chunks; -} - -/** - * Remap chunk startLine/endLine from content-relative positions to original - * source file positions using a lineMap. Each entry in lineMap gives the - * 1-indexed source line for the corresponding 0-indexed content line. - * - * This is used for session JSONL files where buildSessionEntry() flattens - * messages into a plain-text string before chunking. Without remapping the - * stored line numbers would reference positions in the flattened text rather - * than the original JSONL file. - */ -export function remapChunkLines(chunks: MemoryChunk[], lineMap: number[] | undefined): void { - if (!lineMap || lineMap.length === 0) { - return; - } - for (const chunk of chunks) { - // startLine/endLine are 1-indexed; lineMap is 0-indexed by content line - chunk.startLine = lineMap[chunk.startLine - 1] ?? chunk.startLine; - chunk.endLine = lineMap[chunk.endLine - 1] ?? chunk.endLine; - } -} - -export function parseEmbedding(raw: string): number[] { - try { - const parsed = JSON.parse(raw) as number[]; - return Array.isArray(parsed) ? parsed : []; - } catch { - return []; - } -} - -export function cosineSimilarity(a: number[], b: number[]): number { - if (a.length === 0 || b.length === 0) { - return 0; - } - const len = Math.min(a.length, b.length); - let dot = 0; - let normA = 0; - let normB = 0; - for (let i = 0; i < len; i += 1) { - const av = a[i] ?? 0; - const bv = b[i] ?? 0; - dot += av * bv; - normA += av * av; - normB += bv * bv; - } - if (normA === 0 || normB === 0) { - return 0; - } - return dot / (Math.sqrt(normA) * Math.sqrt(normB)); -} - -export async function runWithConcurrency( - tasks: Array<() => Promise>, - limit: number, -): Promise { - const { results, firstError, hasError } = await runTasksWithConcurrency({ - tasks, - limit, - errorMode: "stop", - }); - if (hasError) { - throw firstError; - } - return results; -} +export * from "../../../packages/memory-host-sdk/src/host/internal.js"; diff --git a/src/memory-host-sdk/host/memory-schema.ts b/src/memory-host-sdk/host/memory-schema.ts index 7287b7f035f..5264d758218 100644 --- a/src/memory-host-sdk/host/memory-schema.ts +++ b/src/memory-host-sdk/host/memory-schema.ts @@ -1,103 +1 @@ -import type { DatabaseSync } from "node:sqlite"; -import { formatErrorMessage } from "../../infra/errors.js"; - -export function ensureMemoryIndexSchema(params: { - db: DatabaseSync; - embeddingCacheTable: string; - cacheEnabled: boolean; - ftsTable: string; - ftsEnabled: boolean; - ftsTokenizer?: "unicode61" | "trigram"; -}): { ftsAvailable: boolean; ftsError?: string } { - params.db.exec(` - CREATE TABLE IF NOT EXISTS meta ( - key TEXT PRIMARY KEY, - value TEXT NOT NULL - ); - `); - params.db.exec(` - CREATE TABLE IF NOT EXISTS files ( - path TEXT PRIMARY KEY, - source TEXT NOT NULL DEFAULT 'memory', - hash TEXT NOT NULL, - mtime INTEGER NOT NULL, - size INTEGER NOT NULL - ); - `); - params.db.exec(` - CREATE TABLE IF NOT EXISTS chunks ( - id TEXT PRIMARY KEY, - path TEXT NOT NULL, - source TEXT NOT NULL DEFAULT 'memory', - start_line INTEGER NOT NULL, - end_line INTEGER NOT NULL, - hash TEXT NOT NULL, - model TEXT NOT NULL, - text TEXT NOT NULL, - embedding TEXT NOT NULL, - updated_at INTEGER NOT NULL - ); - `); - if (params.cacheEnabled) { - params.db.exec(` - CREATE TABLE IF NOT EXISTS ${params.embeddingCacheTable} ( - provider TEXT NOT NULL, - model TEXT NOT NULL, - provider_key TEXT NOT NULL, - hash TEXT NOT NULL, - embedding TEXT NOT NULL, - dims INTEGER, - updated_at INTEGER NOT NULL, - PRIMARY KEY (provider, model, provider_key, hash) - ); - `); - params.db.exec( - `CREATE INDEX IF NOT EXISTS idx_embedding_cache_updated_at ON ${params.embeddingCacheTable}(updated_at);`, - ); - } - - let ftsAvailable = false; - let ftsError: string | undefined; - if (params.ftsEnabled) { - try { - const tokenizer = params.ftsTokenizer ?? "unicode61"; - const tokenizeClause = tokenizer === "trigram" ? `, tokenize='trigram case_sensitive 0'` : ""; - params.db.exec( - `CREATE VIRTUAL TABLE IF NOT EXISTS ${params.ftsTable} USING fts5(\n` + - ` text,\n` + - ` id UNINDEXED,\n` + - ` path UNINDEXED,\n` + - ` source UNINDEXED,\n` + - ` model UNINDEXED,\n` + - ` start_line UNINDEXED,\n` + - ` end_line UNINDEXED\n` + - `${tokenizeClause});`, - ); - ftsAvailable = true; - } catch (err) { - const message = formatErrorMessage(err); - ftsAvailable = false; - ftsError = message; - } - } - - ensureColumn(params.db, "files", "source", "TEXT NOT NULL DEFAULT 'memory'"); - ensureColumn(params.db, "chunks", "source", "TEXT NOT NULL DEFAULT 'memory'"); - params.db.exec(`CREATE INDEX IF NOT EXISTS idx_chunks_path ON chunks(path);`); - params.db.exec(`CREATE INDEX IF NOT EXISTS idx_chunks_source ON chunks(source);`); - - return { ftsAvailable, ...(ftsError ? { ftsError } : {}) }; -} - -function ensureColumn( - db: DatabaseSync, - table: "files" | "chunks", - column: string, - definition: string, -): void { - const rows = db.prepare(`PRAGMA table_info(${table})`).all() as Array<{ name: string }>; - if (rows.some((row) => row.name === column)) { - return; - } - db.exec(`ALTER TABLE ${table} ADD COLUMN ${column} ${definition}`); -} +export * from "../../../packages/memory-host-sdk/src/host/memory-schema.js"; diff --git a/src/memory-host-sdk/host/mirror.test.ts b/src/memory-host-sdk/host/mirror.test.ts index 918b6d85a4d..4b1dc385c56 100644 --- a/src/memory-host-sdk/host/mirror.test.ts +++ b/src/memory-host-sdk/host/mirror.test.ts @@ -4,22 +4,37 @@ import { fileURLToPath } from "node:url"; import { describe, expect, it } from "vitest"; const HOST_DIR = path.dirname(fileURLToPath(import.meta.url)); -const REPO_ROOT = path.resolve(HOST_DIR, "../../.."); -const PACKAGE_HOST_DIR = path.join(REPO_ROOT, "packages/memory-host-sdk/src/host"); -const PACKAGE_COVERED_MIRRORS = [ +const PACKAGE_BRIDGE_FILES = [ + "backend-config.ts", + "batch-error-utils.ts", "batch-output.ts", "batch-status.ts", - "embedding-chunk-limits.ts", - "embeddings-model-normalize.ts", + "embedding-input-limits.ts", + "embeddings-remote-provider.ts", + "embeddings.ts", + "internal.ts", + "memory-schema.ts", + "multimodal.ts", + "qmd-process.ts", + "qmd-scope.ts", + "query-expansion.ts", + "read-file-shared.ts", + "read-file.ts", + "session-files.ts", + "types.ts", ] as const; -describe("memory-host-sdk mirrored host modules", () => { - it("keeps package-covered source mirrors byte-identical", () => { - for (const fileName of PACKAGE_COVERED_MIRRORS) { - const srcSource = fs.readFileSync(path.join(HOST_DIR, fileName), "utf8"); - const packageSource = fs.readFileSync(path.join(PACKAGE_HOST_DIR, fileName), "utf8"); - expect(srcSource, fileName).toBe(packageSource); +describe("memory-host-sdk host package bridges", () => { + it("keeps package-owned source bridges thin", () => { + for (const fileName of PACKAGE_BRIDGE_FILES) { + const source = fs.readFileSync(path.join(HOST_DIR, fileName), "utf8"); + expect(source, fileName).toBe( + `export * from "../../../packages/memory-host-sdk/src/host/${fileName.replace( + /\.ts$/u, + ".js", + )}";\n`, + ); } }); }); diff --git a/src/memory-host-sdk/host/multimodal.ts b/src/memory-host-sdk/host/multimodal.ts index 3ad16b9d414..200f20a715b 100644 --- a/src/memory-host-sdk/host/multimodal.ts +++ b/src/memory-host-sdk/host/multimodal.ts @@ -1,108 +1 @@ -import { - lowercasePreservingWhitespace, - normalizeLowercaseStringOrEmpty, -} from "../../shared/string-coerce.js"; - -const MEMORY_MULTIMODAL_SPECS = { - image: { - labelPrefix: "Image file", - extensions: [".jpg", ".jpeg", ".png", ".webp", ".gif", ".heic", ".heif"], - }, - audio: { - labelPrefix: "Audio file", - extensions: [".mp3", ".wav", ".ogg", ".opus", ".m4a", ".aac", ".flac"], - }, -} as const; - -export type MemoryMultimodalModality = keyof typeof MEMORY_MULTIMODAL_SPECS; -export const MEMORY_MULTIMODAL_MODALITIES = Object.keys( - MEMORY_MULTIMODAL_SPECS, -) as MemoryMultimodalModality[]; -export type MemoryMultimodalSelection = MemoryMultimodalModality | "all"; - -export type MemoryMultimodalSettings = { - enabled: boolean; - modalities: MemoryMultimodalModality[]; - maxFileBytes: number; -}; - -export const DEFAULT_MEMORY_MULTIMODAL_MAX_FILE_BYTES = 10 * 1024 * 1024; - -export function normalizeMemoryMultimodalModalities( - raw: MemoryMultimodalSelection[] | undefined, -): MemoryMultimodalModality[] { - if (raw === undefined || raw.includes("all")) { - return [...MEMORY_MULTIMODAL_MODALITIES]; - } - const normalized = new Set(); - for (const value of raw) { - if (value === "image" || value === "audio") { - normalized.add(value); - } - } - return Array.from(normalized); -} - -export function normalizeMemoryMultimodalSettings(raw: { - enabled?: boolean; - modalities?: MemoryMultimodalSelection[]; - maxFileBytes?: number; -}): MemoryMultimodalSettings { - const enabled = raw.enabled === true; - const maxFileBytes = - typeof raw.maxFileBytes === "number" && Number.isFinite(raw.maxFileBytes) - ? Math.max(1, Math.floor(raw.maxFileBytes)) - : DEFAULT_MEMORY_MULTIMODAL_MAX_FILE_BYTES; - return { - enabled, - modalities: enabled ? normalizeMemoryMultimodalModalities(raw.modalities) : [], - maxFileBytes, - }; -} - -export function isMemoryMultimodalEnabled(settings: MemoryMultimodalSettings): boolean { - return settings.enabled && settings.modalities.length > 0; -} - -export function getMemoryMultimodalExtensions( - modality: MemoryMultimodalModality, -): readonly string[] { - return MEMORY_MULTIMODAL_SPECS[modality].extensions; -} - -export function buildMemoryMultimodalLabel( - modality: MemoryMultimodalModality, - normalizedPath: string, -): string { - return `${MEMORY_MULTIMODAL_SPECS[modality].labelPrefix}: ${normalizedPath}`; -} - -export function buildCaseInsensitiveExtensionGlob(extension: string): string { - const normalized = normalizeLowercaseStringOrEmpty(extension).replace(/^\./, ""); - if (!normalized) { - return "*"; - } - const parts = Array.from(normalized, (char) => { - const lower = lowercasePreservingWhitespace(char); - return `[${lower}${char.toUpperCase()}]`; - }); - return `*.${parts.join("")}`; -} - -export function classifyMemoryMultimodalPath( - filePath: string, - settings: MemoryMultimodalSettings, -): MemoryMultimodalModality | null { - if (!isMemoryMultimodalEnabled(settings)) { - return null; - } - const lower = normalizeLowercaseStringOrEmpty(filePath); - for (const modality of settings.modalities) { - for (const extension of getMemoryMultimodalExtensions(modality)) { - if (lower.endsWith(extension)) { - return modality; - } - } - } - return null; -} +export * from "../../../packages/memory-host-sdk/src/host/multimodal.js"; diff --git a/src/memory-host-sdk/host/qmd-process.ts b/src/memory-host-sdk/host/qmd-process.ts index eeccae9654e..127233da6b9 100644 --- a/src/memory-host-sdk/host/qmd-process.ts +++ b/src/memory-host-sdk/host/qmd-process.ts @@ -1,184 +1 @@ -import { spawn } from "node:child_process"; -import { - materializeWindowsSpawnProgram, - resolveWindowsSpawnProgram, -} from "../../plugin-sdk/windows-spawn.js"; - -export type CliSpawnInvocation = { - command: string; - argv: string[]; - shell?: boolean; - windowsHide?: boolean; -}; - -export type QmdBinaryAvailability = { - available: boolean; - error?: string; -}; - -export function resolveCliSpawnInvocation(params: { - command: string; - args: string[]; - env: NodeJS.ProcessEnv; - packageName: string; -}): CliSpawnInvocation { - const program = resolveWindowsSpawnProgram({ - command: params.command, - platform: process.platform, - env: params.env, - execPath: process.execPath, - packageName: params.packageName, - allowShellFallback: false, - }); - return materializeWindowsSpawnProgram(program, params.args); -} - -export async function checkQmdBinaryAvailability(params: { - command: string; - env: NodeJS.ProcessEnv; - cwd?: string; - timeoutMs?: number; -}): Promise { - let spawnInvocation: CliSpawnInvocation; - try { - spawnInvocation = resolveCliSpawnInvocation({ - command: params.command, - args: [], - env: params.env, - packageName: "qmd", - }); - } catch (err) { - return { available: false, error: formatQmdAvailabilityError(err) }; - } - - return await new Promise((resolve) => { - let settled = false; - let didSpawn = false; - const finish = (result: QmdBinaryAvailability) => { - if (settled) { - return; - } - settled = true; - if (timer) { - clearTimeout(timer); - } - resolve(result); - }; - - const child = spawn(spawnInvocation.command, spawnInvocation.argv, { - env: params.env, - cwd: params.cwd ?? process.cwd(), - shell: spawnInvocation.shell, - windowsHide: spawnInvocation.windowsHide, - stdio: "ignore", - }); - const timer = setTimeout(() => { - child.kill("SIGKILL"); - finish({ - available: false, - error: `spawn ${params.command} timed out after ${params.timeoutMs ?? 2_000}ms`, - }); - }, params.timeoutMs ?? 2_000); - - child.once("error", (err) => { - finish({ available: false, error: formatQmdAvailabilityError(err) }); - }); - child.once("spawn", () => { - didSpawn = true; - child.kill(); - finish({ available: true }); - }); - child.once("close", () => { - if (!didSpawn) { - return; - } - finish({ available: true }); - }); - }); -} - -export async function runCliCommand(params: { - commandSummary: string; - spawnInvocation: CliSpawnInvocation; - env: NodeJS.ProcessEnv; - cwd: string; - timeoutMs?: number; - maxOutputChars: number; - discardStdout?: boolean; -}): Promise<{ stdout: string; stderr: string }> { - return await new Promise((resolve, reject) => { - const child = spawn(params.spawnInvocation.command, params.spawnInvocation.argv, { - env: params.env, - cwd: params.cwd, - shell: params.spawnInvocation.shell, - windowsHide: params.spawnInvocation.windowsHide, - }); - let stdout = ""; - let stderr = ""; - let stdoutTruncated = false; - let stderrTruncated = false; - const discardStdout = params.discardStdout === true; - const timer = params.timeoutMs - ? setTimeout(() => { - child.kill("SIGKILL"); - reject(new Error(`${params.commandSummary} timed out after ${params.timeoutMs}ms`)); - }, params.timeoutMs) - : null; - child.stdout.on("data", (data) => { - if (discardStdout) { - return; - } - const next = appendOutputWithCap(stdout, data.toString("utf8"), params.maxOutputChars); - stdout = next.text; - stdoutTruncated = stdoutTruncated || next.truncated; - }); - child.stderr.on("data", (data) => { - const next = appendOutputWithCap(stderr, data.toString("utf8"), params.maxOutputChars); - stderr = next.text; - stderrTruncated = stderrTruncated || next.truncated; - }); - child.on("error", (err) => { - if (timer) { - clearTimeout(timer); - } - reject(err); - }); - child.on("close", (code) => { - if (timer) { - clearTimeout(timer); - } - if (!discardStdout && (stdoutTruncated || stderrTruncated)) { - reject( - new Error( - `${params.commandSummary} produced too much output (limit ${params.maxOutputChars} chars)`, - ), - ); - return; - } - if (code === 0) { - resolve({ stdout, stderr }); - } else { - reject(new Error(`${params.commandSummary} failed (code ${code}): ${stderr || stdout}`)); - } - }); - }); -} - -function appendOutputWithCap( - current: string, - chunk: string, - maxChars: number, -): { text: string; truncated: boolean } { - const appended = current + chunk; - if (appended.length <= maxChars) { - return { text: appended, truncated: false }; - } - return { text: appended.slice(-maxChars), truncated: true }; -} - -function formatQmdAvailabilityError(err: unknown): string { - if (err instanceof Error && err.message) { - return err.message; - } - return String(err); -} +export * from "../../../packages/memory-host-sdk/src/host/qmd-process.js"; diff --git a/src/memory-host-sdk/host/qmd-scope.ts b/src/memory-host-sdk/host/qmd-scope.ts index 3b2a350d2bd..22ed5005a14 100644 --- a/src/memory-host-sdk/host/qmd-scope.ts +++ b/src/memory-host-sdk/host/qmd-scope.ts @@ -1,110 +1 @@ -import { parseAgentSessionKey } from "../../sessions/session-key-utils.js"; -import { - normalizeLowercaseStringOrEmpty, - normalizeOptionalLowercaseString, -} from "../../shared/string-coerce.js"; -import type { ResolvedQmdConfig } from "./backend-config.js"; - -type ParsedQmdSessionScope = { - channel?: string; - chatType?: "channel" | "group" | "direct"; - normalizedKey?: string; -}; - -export function isQmdScopeAllowed(scope: ResolvedQmdConfig["scope"], sessionKey?: string): boolean { - if (!scope) { - return true; - } - const parsed = parseQmdSessionScope(sessionKey); - const channel = parsed.channel; - const chatType = parsed.chatType; - const normalizedKey = parsed.normalizedKey ?? ""; - const rawKey = normalizeLowercaseStringOrEmpty(sessionKey); - for (const rule of scope.rules ?? []) { - if (!rule) { - continue; - } - const match = rule.match ?? {}; - if (match.channel && match.channel !== channel) { - continue; - } - if (match.chatType && match.chatType !== chatType) { - continue; - } - const normalizedPrefix = normalizeOptionalLowercaseString(match.keyPrefix); - const rawPrefix = normalizeOptionalLowercaseString(match.rawKeyPrefix); - - if (rawPrefix && !rawKey.startsWith(rawPrefix)) { - continue; - } - if (normalizedPrefix) { - // Backward compat: older configs used `keyPrefix: "agent::..."` to match raw keys. - const isLegacyRaw = normalizedPrefix.startsWith("agent:"); - if (isLegacyRaw) { - if (!rawKey.startsWith(normalizedPrefix)) { - continue; - } - } else if (!normalizedKey.startsWith(normalizedPrefix)) { - continue; - } - } - return rule.action === "allow"; - } - const fallback = scope.default ?? "allow"; - return fallback === "allow"; -} - -export function deriveQmdScopeChannel(key?: string): string | undefined { - return parseQmdSessionScope(key).channel; -} - -export function deriveQmdScopeChatType(key?: string): "channel" | "group" | "direct" | undefined { - return parseQmdSessionScope(key).chatType; -} - -function parseQmdSessionScope(key?: string): ParsedQmdSessionScope { - const normalized = normalizeQmdSessionKey(key); - if (!normalized) { - return {}; - } - const parts = normalized.split(":").filter(Boolean); - let chatType: ParsedQmdSessionScope["chatType"]; - if ( - parts.length >= 2 && - (parts[1] === "group" || parts[1] === "channel" || parts[1] === "direct" || parts[1] === "dm") - ) { - if (parts.includes("group")) { - chatType = "group"; - } else if (parts.includes("channel")) { - chatType = "channel"; - } - return { - normalizedKey: normalized, - channel: normalizeOptionalLowercaseString(parts[0]), - chatType: chatType ?? "direct", - }; - } - if (normalized.includes(":group:")) { - return { normalizedKey: normalized, chatType: "group" }; - } - if (normalized.includes(":channel:")) { - return { normalizedKey: normalized, chatType: "channel" }; - } - return { normalizedKey: normalized, chatType: "direct" }; -} - -function normalizeQmdSessionKey(key?: string): string | undefined { - if (!key) { - return undefined; - } - const trimmed = key.trim(); - if (!trimmed) { - return undefined; - } - const parsed = parseAgentSessionKey(trimmed); - const normalized = normalizeLowercaseStringOrEmpty(parsed?.rest ?? trimmed); - if (normalized.startsWith("subagent:")) { - return undefined; - } - return normalized; -} +export * from "../../../packages/memory-host-sdk/src/host/qmd-scope.js"; diff --git a/src/memory-host-sdk/host/query-expansion.ts b/src/memory-host-sdk/host/query-expansion.ts index 161f89ea59e..2c941262724 100644 --- a/src/memory-host-sdk/host/query-expansion.ts +++ b/src/memory-host-sdk/host/query-expansion.ts @@ -1,830 +1 @@ -/** - * Query expansion for FTS-only search mode. - * - * When no embedding provider is available, we fall back to FTS (full-text search). - * FTS works best with specific keywords, but users often ask conversational queries - * like "that thing we discussed yesterday" or "之前讨论的那个方案". - * - * This module extracts meaningful keywords from such queries to improve FTS results. - */ - -import { normalizeLowercaseStringOrEmpty } from "../../shared/string-coerce.js"; - -// Common stop words that don't add search value -const STOP_WORDS_EN = new Set([ - // Articles and determiners - "a", - "an", - "the", - "this", - "that", - "these", - "those", - // Pronouns - "i", - "me", - "my", - "we", - "our", - "you", - "your", - "he", - "she", - "it", - "they", - "them", - // Common verbs - "is", - "are", - "was", - "were", - "be", - "been", - "being", - "have", - "has", - "had", - "do", - "does", - "did", - "will", - "would", - "could", - "should", - "can", - "may", - "might", - // Prepositions - "in", - "on", - "at", - "to", - "for", - "of", - "with", - "by", - "from", - "about", - "into", - "through", - "during", - "before", - "after", - "above", - "below", - "between", - "under", - "over", - // Conjunctions - "and", - "or", - "but", - "if", - "then", - "because", - "as", - "while", - "when", - "where", - "what", - "which", - "who", - "how", - "why", - // Time references (vague, not useful for FTS) - "yesterday", - "today", - "tomorrow", - "earlier", - "later", - "recently", - "before", - "ago", - "just", - "now", - // Vague references - "thing", - "things", - "stuff", - "something", - "anything", - "everything", - "nothing", - // Question words - "please", - "help", - "find", - "show", - "get", - "tell", - "give", -]); - -const STOP_WORDS_ES = new Set([ - // Articles and determiners - "el", - "la", - "los", - "las", - "un", - "una", - "unos", - "unas", - "este", - "esta", - "ese", - "esa", - // Pronouns - "yo", - "me", - "mi", - "nosotros", - "nosotras", - "tu", - "tus", - "usted", - "ustedes", - "ellos", - "ellas", - // Prepositions and conjunctions - "de", - "del", - "a", - "en", - "con", - "por", - "para", - "sobre", - "entre", - "y", - "o", - "pero", - "si", - "porque", - "como", - // Common verbs / auxiliaries - "es", - "son", - "fue", - "fueron", - "ser", - "estar", - "haber", - "tener", - "hacer", - // Time references (vague) - "ayer", - "hoy", - "mañana", - "antes", - "despues", - "después", - "ahora", - "recientemente", - // Question/request words - "que", - "qué", - "cómo", - "cuando", - "cuándo", - "donde", - "dónde", - "porqué", - "favor", - "ayuda", -]); - -const STOP_WORDS_PT = new Set([ - // Articles and determiners - "o", - "a", - "os", - "as", - "um", - "uma", - "uns", - "umas", - "este", - "esta", - "esse", - "essa", - // Pronouns - "eu", - "me", - "meu", - "minha", - "nos", - "nós", - "você", - "vocês", - "ele", - "ela", - "eles", - "elas", - // Prepositions and conjunctions - "de", - "do", - "da", - "em", - "com", - "por", - "para", - "sobre", - "entre", - "e", - "ou", - "mas", - "se", - "porque", - "como", - // Common verbs / auxiliaries - "é", - "são", - "foi", - "foram", - "ser", - "estar", - "ter", - "fazer", - // Time references (vague) - "ontem", - "hoje", - "amanhã", - "antes", - "depois", - "agora", - "recentemente", - // Question/request words - "que", - "quê", - "quando", - "onde", - "porquê", - "favor", - "ajuda", -]); - -const STOP_WORDS_AR = new Set([ - // Articles and connectors - "ال", - "و", - "أو", - "لكن", - "ثم", - "بل", - // Pronouns / references - "أنا", - "نحن", - "هو", - "هي", - "هم", - "هذا", - "هذه", - "ذلك", - "تلك", - "هنا", - "هناك", - // Common prepositions - "من", - "إلى", - "الى", - "في", - "على", - "عن", - "مع", - "بين", - "ل", - "ب", - "ك", - // Common auxiliaries / vague verbs - "كان", - "كانت", - "يكون", - "تكون", - "صار", - "أصبح", - "يمكن", - "ممكن", - // Time references (vague) - "بالأمس", - "امس", - "اليوم", - "غدا", - "الآن", - "قبل", - "بعد", - "مؤخرا", - // Question/request words - "لماذا", - "كيف", - "ماذا", - "متى", - "أين", - "هل", - "من فضلك", - "فضلا", - "ساعد", -]); - -const STOP_WORDS_KO = new Set([ - // Particles (조사) - "은", - "는", - "이", - "가", - "을", - "를", - "의", - "에", - "에서", - "로", - "으로", - "와", - "과", - "도", - "만", - "까지", - "부터", - "한테", - "에게", - "께", - "처럼", - "같이", - "보다", - "마다", - "밖에", - "대로", - // Pronouns (대명사) - "나", - "나는", - "내가", - "나를", - "너", - "우리", - "저", - "저희", - "그", - "그녀", - "그들", - "이것", - "저것", - "그것", - "여기", - "저기", - "거기", - // Common verbs / auxiliaries (일반 동사/보조 동사) - "있다", - "없다", - "하다", - "되다", - "이다", - "아니다", - "보다", - "주다", - "오다", - "가다", - // Nouns (의존 명사 / vague) - "것", - "거", - "등", - "수", - "때", - "곳", - "중", - "분", - // Adverbs - "잘", - "더", - "또", - "매우", - "정말", - "아주", - "많이", - "너무", - "좀", - // Conjunctions - "그리고", - "하지만", - "그래서", - "그런데", - "그러나", - "또는", - "그러면", - // Question words - "왜", - "어떻게", - "뭐", - "언제", - "어디", - "누구", - "무엇", - "어떤", - // Time (vague) - "어제", - "오늘", - "내일", - "최근", - "지금", - "아까", - "나중", - "전에", - // Request words - "제발", - "부탁", -]); - -// Common Korean trailing particles to strip from words for tokenization -// Sorted by descending length so longest-match-first is guaranteed. -const KO_TRAILING_PARTICLES = [ - "에서", - "으로", - "에게", - "한테", - "처럼", - "같이", - "보다", - "까지", - "부터", - "마다", - "밖에", - "대로", - "은", - "는", - "이", - "가", - "을", - "를", - "의", - "에", - "로", - "와", - "과", - "도", - "만", -].toSorted((a, b) => b.length - a.length); - -function stripKoreanTrailingParticle(token: string): string | null { - for (const particle of KO_TRAILING_PARTICLES) { - if (token.length > particle.length && token.endsWith(particle)) { - return token.slice(0, -particle.length); - } - } - return null; -} - -function isUsefulKoreanStem(stem: string): boolean { - // Prevent bogus one-syllable stems from words like "논의" -> "논". - if (/[\uac00-\ud7af]/.test(stem)) { - return stem.length >= 2; - } - // Keep stripped ASCII stems for mixed tokens like "API를" -> "api". - return /^[a-z0-9_]+$/i.test(stem); -} - -const STOP_WORDS_JA = new Set([ - // Pronouns and references - "これ", - "それ", - "あれ", - "この", - "その", - "あの", - "ここ", - "そこ", - "あそこ", - // Common auxiliaries / vague verbs - "する", - "した", - "して", - "です", - "ます", - "いる", - "ある", - "なる", - "できる", - // Particles / connectors - "の", - "こと", - "もの", - "ため", - "そして", - "しかし", - "また", - "でも", - "から", - "まで", - "より", - "だけ", - // Question words - "なぜ", - "どう", - "何", - "いつ", - "どこ", - "誰", - "どれ", - // Time (vague) - "昨日", - "今日", - "明日", - "最近", - "今", - "さっき", - "前", - "後", -]); - -const STOP_WORDS_ZH = new Set([ - // Pronouns - "我", - "我们", - "你", - "你们", - "他", - "她", - "它", - "他们", - "这", - "那", - "这个", - "那个", - "这些", - "那些", - // Auxiliary words - "的", - "了", - "着", - "过", - "得", - "地", - "吗", - "呢", - "吧", - "啊", - "呀", - "嘛", - "啦", - // Verbs (common, vague) - "是", - "有", - "在", - "被", - "把", - "给", - "让", - "用", - "到", - "去", - "来", - "做", - "说", - "看", - "找", - "想", - "要", - "能", - "会", - "可以", - // Prepositions and conjunctions - "和", - "与", - "或", - "但", - "但是", - "因为", - "所以", - "如果", - "虽然", - "而", - "也", - "都", - "就", - "还", - "又", - "再", - "才", - "只", - // Time (vague) - "之前", - "以前", - "之后", - "以后", - "刚才", - "现在", - "昨天", - "今天", - "明天", - "最近", - // Vague references - "东西", - "事情", - "事", - "什么", - "哪个", - "哪些", - "怎么", - "为什么", - "多少", - // Question/request words - "请", - "帮", - "帮忙", - "告诉", -]); - -export function isQueryStopWordToken(token: string): boolean { - return ( - STOP_WORDS_EN.has(token) || - STOP_WORDS_ES.has(token) || - STOP_WORDS_PT.has(token) || - STOP_WORDS_AR.has(token) || - STOP_WORDS_ZH.has(token) || - STOP_WORDS_KO.has(token) || - STOP_WORDS_JA.has(token) - ); -} - -/** - * Check if a token looks like a meaningful keyword. - * Returns false for short tokens, numbers-only, etc. - */ -function isValidKeyword(token: string): boolean { - if (!token || token.length === 0) { - return false; - } - // Skip very short English words (likely stop words or fragments) - if (/^[a-zA-Z]+$/.test(token) && token.length < 3) { - return false; - } - // Skip pure numbers (not useful for semantic search) - if (/^\d+$/.test(token)) { - return false; - } - // Skip tokens that are all punctuation - if (/^[\p{P}\p{S}]+$/u.test(token)) { - return false; - } - return true; -} - -/** - * Simple tokenizer that handles English, Chinese, Korean, and Japanese text. - * For Chinese, we do character-based splitting since we don't have a proper segmenter. - * For English, we split on whitespace and punctuation. - */ -function tokenize(text: string, opts?: { ftsTokenizer?: "unicode61" | "trigram" }): string[] { - const useTrigram = opts?.ftsTokenizer === "trigram"; - const tokens: string[] = []; - const normalized = normalizeLowercaseStringOrEmpty(text); - - // Split into segments (English words, Chinese character sequences, etc.) - const segments = normalized.split(/[\s\p{P}]+/u).filter(Boolean); - - for (const segment of segments) { - // Japanese text often mixes scripts (kanji/kana/ASCII) without spaces. - // Extract script-specific chunks so technical terms like "API" / "バグ" are retained. - if (/[\u3040-\u30ff]/.test(segment)) { - const jpParts = - segment.match(/[a-z0-9_]+|[\u30a0-\u30ffー]+|[\u4e00-\u9fff]+|[\u3040-\u309f]{2,}/g) ?? []; - for (const part of jpParts) { - if (/^[\u4e00-\u9fff]+$/.test(part)) { - tokens.push(part); - if (!useTrigram) { - for (let i = 0; i < part.length - 1; i++) { - tokens.push(part[i] + part[i + 1]); - } - } - } else { - tokens.push(part); - } - } - } else if (/[\u4e00-\u9fff]/.test(segment)) { - // Check if segment contains CJK characters (Chinese) - const chars = Array.from(segment).filter((c) => /[\u4e00-\u9fff]/.test(c)); - if (useTrigram) { - // In trigram mode, push the whole contiguous CJK block (mirroring the - // Japanese kanji path). SQLite's trigram FTS requires at least 3 characters - // per query term — individual characters silently return no results. - const block = chars.join(""); - if (block.length > 0) { - tokens.push(block); - } - } else { - // Default mode: unigrams + bigrams for phrase matching - tokens.push(...chars); - for (let i = 0; i < chars.length - 1; i++) { - tokens.push(chars[i] + chars[i + 1]); - } - } - } else if (/[\uac00-\ud7af\u3131-\u3163]/.test(segment)) { - // For Korean (Hangul syllables and jamo), keep the word as-is unless it is - // effectively a stop word once trailing particles are removed. - const stem = stripKoreanTrailingParticle(segment); - const stemIsStopWord = stem !== null && STOP_WORDS_KO.has(stem); - if (!STOP_WORDS_KO.has(segment) && !stemIsStopWord) { - tokens.push(segment); - } - // Also emit particle-stripped stems when they are useful keywords. - if (stem && !STOP_WORDS_KO.has(stem) && isUsefulKoreanStem(stem)) { - tokens.push(stem); - } - } else { - // For non-CJK, keep as single token - tokens.push(segment); - } - } - - return tokens; -} - -/** - * Extract keywords from a conversational query for FTS search. - * - * Examples: - * - "that thing we discussed about the API" → ["discussed", "API"] - * - "之前讨论的那个方案" → ["讨论", "方案"] - * - "what was the solution for the bug" → ["solution", "bug"] - */ -export function extractKeywords( - query: string, - opts?: { ftsTokenizer?: "unicode61" | "trigram" }, -): string[] { - const tokens = tokenize(query, opts); - const keywords: string[] = []; - const seen = new Set(); - - for (const token of tokens) { - // Skip stop words - if (isQueryStopWordToken(token)) { - continue; - } - // Skip invalid keywords - if (!isValidKeyword(token)) { - continue; - } - // Skip duplicates - if (seen.has(token)) { - continue; - } - seen.add(token); - keywords.push(token); - } - - return keywords; -} - -/** - * Expand a query for FTS search. - * Returns both the original query and extracted keywords for OR-matching. - * - * @param query - User's original query - * @returns Object with original query and extracted keywords - */ -export function expandQueryForFts( - query: string, - opts?: { ftsTokenizer?: "unicode61" | "trigram" }, -): { - original: string; - keywords: string[]; - expanded: string; -} { - const original = query.trim(); - const keywords = extractKeywords(original, opts); - - // Build expanded query: original terms OR extracted keywords - // This ensures both exact matches and keyword matches are found - const expanded = keywords.length > 0 ? `${original} OR ${keywords.join(" OR ")}` : original; - - return { original, keywords, expanded }; -} - -/** - * Type for an optional LLM-based query expander. - * Can be provided to enhance keyword extraction with semantic understanding. - */ -export type LlmQueryExpander = (query: string) => Promise; - -/** - * Expand query with optional LLM assistance. - * Falls back to local extraction if LLM is unavailable or fails. - */ -export async function expandQueryWithLlm( - query: string, - llmExpander?: LlmQueryExpander, - opts?: { ftsTokenizer?: "unicode61" | "trigram" }, -): Promise { - // If LLM expander is provided, try it first - if (llmExpander) { - try { - const llmKeywords = await llmExpander(query); - if (llmKeywords.length > 0) { - return llmKeywords; - } - } catch { - // LLM failed, fall back to local extraction - } - } - - // Fall back to local keyword extraction - return extractKeywords(query, opts); -} +export * from "../../../packages/memory-host-sdk/src/host/query-expansion.js"; diff --git a/src/memory-host-sdk/host/read-file.ts b/src/memory-host-sdk/host/read-file.ts index e7f01102168..af586356207 100644 --- a/src/memory-host-sdk/host/read-file.ts +++ b/src/memory-host-sdk/host/read-file.ts @@ -1,107 +1 @@ -import fs from "node:fs/promises"; -import path from "node:path"; -import { resolveAgentContextLimits, resolveAgentWorkspaceDir } from "../../agents/agent-scope.js"; -import { resolveMemorySearchConfig } from "../../agents/memory-search.js"; -import type { OpenClawConfig } from "../../config/types.openclaw.js"; -import { isFileMissingError, statRegularFile } from "./fs-utils.js"; -import { isMemoryPath, normalizeExtraMemoryPaths } from "./internal.js"; -import { - buildMemoryReadResult, - DEFAULT_MEMORY_READ_LINES, - type MemoryReadResult, -} from "./read-file-shared.js"; - -export async function readMemoryFile(params: { - workspaceDir: string; - extraPaths?: string[]; - relPath: string; - from?: number; - lines?: number; - defaultLines?: number; - maxChars?: number; -}): Promise { - const rawPath = params.relPath.trim(); - if (!rawPath) { - throw new Error("path required"); - } - const absPath = path.isAbsolute(rawPath) - ? path.resolve(rawPath) - : path.resolve(params.workspaceDir, rawPath); - const relPath = path.relative(params.workspaceDir, absPath).replace(/\\/g, "/"); - const inWorkspace = relPath.length > 0 && !relPath.startsWith("..") && !path.isAbsolute(relPath); - const allowedWorkspace = inWorkspace && isMemoryPath(relPath); - let allowedAdditional = false; - if (!allowedWorkspace && (params.extraPaths?.length ?? 0) > 0) { - const additionalPaths = normalizeExtraMemoryPaths(params.workspaceDir, params.extraPaths); - for (const additionalPath of additionalPaths) { - try { - const stat = await fs.lstat(additionalPath); - if (stat.isSymbolicLink()) { - continue; - } - if (stat.isDirectory()) { - if (absPath === additionalPath || absPath.startsWith(`${additionalPath}${path.sep}`)) { - allowedAdditional = true; - break; - } - continue; - } - if (stat.isFile() && absPath === additionalPath && absPath.endsWith(".md")) { - allowedAdditional = true; - break; - } - } catch {} - } - } - if (!allowedWorkspace && !allowedAdditional) { - throw new Error("path required"); - } - if (!absPath.endsWith(".md")) { - throw new Error("path required"); - } - const statResult = await statRegularFile(absPath); - if (statResult.missing) { - return { text: "", path: relPath }; - } - let content: string; - try { - content = await fs.readFile(absPath, "utf-8"); - } catch (err) { - if (isFileMissingError(err)) { - return { text: "", path: relPath }; - } - throw err; - } - return buildMemoryReadResult({ - content, - relPath, - from: params.from, - lines: params.lines, - defaultLines: params.defaultLines ?? DEFAULT_MEMORY_READ_LINES, - maxChars: params.maxChars, - suggestReadFallback: allowedWorkspace, - }); -} - -export async function readAgentMemoryFile(params: { - cfg: OpenClawConfig; - agentId: string; - relPath: string; - from?: number; - lines?: number; -}): Promise { - const settings = resolveMemorySearchConfig(params.cfg, params.agentId); - if (!settings) { - throw new Error("memory search disabled"); - } - const contextLimits = resolveAgentContextLimits(params.cfg, params.agentId); - return await readMemoryFile({ - workspaceDir: resolveAgentWorkspaceDir(params.cfg, params.agentId), - extraPaths: settings.extraPaths, - relPath: params.relPath, - from: params.from, - lines: params.lines, - defaultLines: contextLimits?.memoryGetDefaultLines, - maxChars: contextLimits?.memoryGetMaxChars, - }); -} +export * from "../../../packages/memory-host-sdk/src/host/read-file.js"; diff --git a/src/memory-host-sdk/host/session-files.ts b/src/memory-host-sdk/host/session-files.ts index aa53eb8e89f..0b49b8bb276 100644 --- a/src/memory-host-sdk/host/session-files.ts +++ b/src/memory-host-sdk/host/session-files.ts @@ -1,565 +1 @@ -import fsSync from "node:fs"; -import fs from "node:fs/promises"; -import path from "node:path"; -import { stripInternalRuntimeContext } from "../../agents/internal-runtime-context.js"; -import { isHeartbeatUserMessage } from "../../auto-reply/heartbeat-filter.js"; -import { HEARTBEAT_PROMPT } from "../../auto-reply/heartbeat.js"; -import { stripInboundMetadata } from "../../auto-reply/reply/strip-inbound-meta.js"; -import { HEARTBEAT_TOKEN, isSilentReplyPayloadText } from "../../auto-reply/tokens.js"; -import { - isCompactionCheckpointTranscriptFileName, - isSessionArchiveArtifactName, - isUsageCountedSessionTranscriptFileName, -} from "../../config/sessions/artifacts.js"; -import { resolveSessionTranscriptsDirForAgent } from "../../config/sessions/paths.js"; -import { isExecCompletionEvent } from "../../infra/heartbeat-events-filter.js"; -import { redactSensitiveText } from "../../logging/redact.js"; -import { hasInterSessionUserProvenance } from "../../sessions/input-provenance.js"; -import { isCronRunSessionKey } from "../../sessions/session-key-utils.js"; -import { hashText } from "./hash.js"; - -const DREAMING_NARRATIVE_RUN_PREFIX = "dreaming-narrative-"; -// Keep the historical one-line-per-message export shape for normal turns, but -// wrap pathological long messages so downstream indexers never ingest a single -// toxic line. Wrapped continuation lines still map back to the same JSONL line. -// This limit applies to content only; the role label adds up to 11 chars. -const SESSION_EXPORT_CONTENT_WRAP_CHARS = 800; -const DIRECT_CRON_PROMPT_RE = /^\[cron:[^\]]+\]\s*/; - -export type SessionFileEntry = { - path: string; - absPath: string; - mtimeMs: number; - size: number; - hash: string; - content: string; - /** Maps each content line (0-indexed) to its 1-indexed JSONL source line. */ - lineMap: number[]; - /** Maps each content line (0-indexed) to epoch ms; 0 means unknown timestamp. */ - messageTimestampsMs: number[]; - /** True when this transcript belongs to an internal dreaming narrative run. */ - generatedByDreamingNarrative?: boolean; - /** True when this transcript belongs to an isolated cron run session. */ - generatedByCronRun?: boolean; -}; - -export type BuildSessionEntryOptions = { - /** Optional preclassification from a caller-managed dreaming transcript lookup. */ - generatedByDreamingNarrative?: boolean; - /** Optional preclassification from a caller-managed cron transcript lookup. */ - generatedByCronRun?: boolean; -}; - -export type SessionTranscriptClassification = { - dreamingNarrativeTranscriptPaths: ReadonlySet; - cronRunTranscriptPaths: ReadonlySet; -}; - -type SessionTranscriptStoreEntry = { - sessionFile?: unknown; - sessionId?: unknown; -}; - -function shouldSkipTranscriptFileForDreaming(absPath: string): boolean { - const fileName = path.basename(absPath); - return ( - isSessionArchiveArtifactName(fileName) || isCompactionCheckpointTranscriptFileName(fileName) - ); -} - -function isDreamingNarrativeBootstrapRecord(record: unknown): boolean { - if (!record || typeof record !== "object" || Array.isArray(record)) { - return false; - } - const candidate = record as { - type?: unknown; - customType?: unknown; - data?: unknown; - }; - if ( - candidate.type !== "custom" || - candidate.customType !== "openclaw:bootstrap-context:full" || - !candidate.data || - typeof candidate.data !== "object" || - Array.isArray(candidate.data) - ) { - return false; - } - const runId = (candidate.data as { runId?: unknown }).runId; - return typeof runId === "string" && runId.startsWith(DREAMING_NARRATIVE_RUN_PREFIX); -} - -function hasDreamingNarrativeRunId(value: unknown): boolean { - return typeof value === "string" && value.startsWith(DREAMING_NARRATIVE_RUN_PREFIX); -} - -function isDreamingNarrativeGeneratedRecord(record: unknown): boolean { - if (isDreamingNarrativeBootstrapRecord(record)) { - return true; - } - if (!record || typeof record !== "object" || Array.isArray(record)) { - return false; - } - const candidate = record as { - runId?: unknown; - sessionKey?: unknown; - data?: unknown; - }; - if ( - hasDreamingNarrativeRunId(candidate.runId) || - hasDreamingNarrativeRunId(candidate.sessionKey) - ) { - return true; - } - if (!candidate.data || typeof candidate.data !== "object" || Array.isArray(candidate.data)) { - return false; - } - const nested = candidate.data as { - runId?: unknown; - sessionKey?: unknown; - }; - return hasDreamingNarrativeRunId(nested.runId) || hasDreamingNarrativeRunId(nested.sessionKey); -} - -function isDreamingNarrativeSessionStoreKey(sessionKey: string): boolean { - const trimmed = sessionKey.trim(); - if (!trimmed) { - return false; - } - const firstSeparator = trimmed.indexOf(":"); - if (firstSeparator < 0) { - return trimmed.startsWith(DREAMING_NARRATIVE_RUN_PREFIX); - } - const secondSeparator = trimmed.indexOf(":", firstSeparator + 1); - const sessionSegment = secondSeparator < 0 ? trimmed : trimmed.slice(secondSeparator + 1); - return sessionSegment.startsWith(DREAMING_NARRATIVE_RUN_PREFIX); -} - -function normalizeComparablePath(pathname: string): string { - const resolved = path.resolve(pathname); - return process.platform === "win32" ? resolved.toLowerCase() : resolved; -} - -export function normalizeSessionTranscriptPathForComparison(pathname: string): string { - return normalizeComparablePath(pathname); -} - -function resolveSessionStoreTranscriptPath( - sessionsDir: string, - entry: { sessionFile?: unknown; sessionId?: unknown } | undefined, -): string | null { - if (typeof entry?.sessionFile === "string" && entry.sessionFile.trim().length > 0) { - const sessionFile = entry.sessionFile.trim(); - const resolved = path.isAbsolute(sessionFile) - ? sessionFile - : path.resolve(sessionsDir, sessionFile); - return normalizeComparablePath(resolved); - } - if (typeof entry?.sessionId === "string" && entry.sessionId.trim().length > 0) { - return normalizeComparablePath(path.join(sessionsDir, `${entry.sessionId.trim()}.jsonl`)); - } - return null; -} - -export function loadDreamingNarrativeTranscriptPathSetForSessionsDir( - sessionsDir: string, -): ReadonlySet { - return loadSessionTranscriptClassificationForSessionsDir(sessionsDir) - .dreamingNarrativeTranscriptPaths; -} - -export function loadSessionTranscriptClassificationForSessionsDir( - sessionsDir: string, -): SessionTranscriptClassification { - const storePath = path.join(sessionsDir, "sessions.json"); - const store = readSessionTranscriptClassificationStore(storePath); - const dreamingTranscriptPaths = new Set(); - const cronRunTranscriptPaths = new Set(); - for (const [sessionKey, entry] of Object.entries(store)) { - const transcriptPath = resolveSessionStoreTranscriptPath(sessionsDir, entry); - if (!transcriptPath) { - continue; - } - if (isDreamingNarrativeSessionStoreKey(sessionKey)) { - dreamingTranscriptPaths.add(transcriptPath); - } - if (isCronRunSessionKey(sessionKey)) { - cronRunTranscriptPaths.add(transcriptPath); - } - } - return { - dreamingNarrativeTranscriptPaths: dreamingTranscriptPaths, - cronRunTranscriptPaths, - }; -} - -function readSessionTranscriptClassificationStore( - storePath: string, -): Record { - try { - const parsed = JSON.parse(fsSync.readFileSync(storePath, "utf-8")) as unknown; - if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) { - return {}; - } - return parsed as Record; - } catch { - return {}; - } -} - -export function loadDreamingNarrativeTranscriptPathSetForAgent( - agentId: string, -): ReadonlySet { - return loadSessionTranscriptClassificationForAgent(agentId).dreamingNarrativeTranscriptPaths; -} - -export function loadSessionTranscriptClassificationForAgent( - agentId: string, -): SessionTranscriptClassification { - return loadSessionTranscriptClassificationForSessionsDir( - resolveSessionTranscriptsDirForAgent(agentId), - ); -} - -function classifySessionTranscriptFromSessionStore(absPath: string): { - generatedByDreamingNarrative: boolean; - generatedByCronRun: boolean; -} { - const sessionsDir = path.dirname(absPath); - const normalizedAbsPath = normalizeComparablePath(absPath); - const classification = loadSessionTranscriptClassificationForSessionsDir(sessionsDir); - return { - generatedByDreamingNarrative: - classification.dreamingNarrativeTranscriptPaths.has(normalizedAbsPath), - generatedByCronRun: classification.cronRunTranscriptPaths.has(normalizedAbsPath), - }; -} - -export async function listSessionFilesForAgent(agentId: string): Promise { - const dir = resolveSessionTranscriptsDirForAgent(agentId); - try { - const entries = await fs.readdir(dir, { withFileTypes: true }); - return entries - .filter((entry) => entry.isFile()) - .map((entry) => entry.name) - .filter((name) => isUsageCountedSessionTranscriptFileName(name)) - .map((name) => path.join(dir, name)); - } catch { - return []; - } -} - -export function sessionPathForFile(absPath: string): string { - return path.join("sessions", path.basename(absPath)).replace(/\\/g, "/"); -} - -async function logSessionFileReadFailure(absPath: string, err: unknown): Promise { - const { createSubsystemLogger } = await import("../../logging/subsystem.js"); - createSubsystemLogger("memory").debug(`Failed reading session file ${absPath}: ${String(err)}`); -} - -function normalizeSessionText(value: string): string { - return value - .replace(/\s*\n+\s*/g, " ") - .replace(/\s+/g, " ") - .trim(); -} - -function collectRawSessionText(content: unknown): string | null { - if (typeof content === "string") { - return content; - } - if (!Array.isArray(content)) { - return null; - } - const parts: string[] = []; - for (const block of content) { - if (!block || typeof block !== "object") { - continue; - } - const record = block as { type?: unknown; text?: unknown }; - if (record.type === "text" && typeof record.text === "string") { - parts.push(record.text); - } - } - return parts.length > 0 ? parts.join("\n") : null; -} - -function isHighSurrogate(code: number): boolean { - return code >= 0xd800 && code <= 0xdbff; -} - -function isLowSurrogate(code: number): boolean { - return code >= 0xdc00 && code <= 0xdfff; -} - -function splitLongSessionLine( - text: string, - maxChars: number = SESSION_EXPORT_CONTENT_WRAP_CHARS, -): string[] { - const normalized = text.trim(); - if (!normalized) { - return []; - } - if (normalized.length <= maxChars) { - return [normalized]; - } - - const segments: string[] = []; - let cursor = 0; - while (cursor < normalized.length) { - const remaining = normalized.length - cursor; - if (remaining <= maxChars) { - segments.push(normalized.slice(cursor).trim()); - break; - } - - const limit = cursor + maxChars; - let splitAt = limit; - for (let index = limit; index > cursor; index -= 1) { - if (normalized[index] === " ") { - splitAt = index; - break; - } - } - if ( - splitAt < normalized.length && - splitAt > cursor && - isHighSurrogate(normalized.charCodeAt(splitAt - 1)) && - isLowSurrogate(normalized.charCodeAt(splitAt)) - ) { - splitAt -= 1; - } - segments.push(normalized.slice(cursor, splitAt).trim()); - cursor = splitAt; - while (cursor < normalized.length && normalized[cursor] === " ") { - cursor += 1; - } - } - - return segments.filter(Boolean); -} - -function renderSessionExportLines(label: string, text: string): string[] { - return splitLongSessionLine(text).map((segment) => `${label}: ${segment}`); -} - -/** - * Strip OpenClaw-injected inbound metadata envelopes from a raw text block. - * - * User-role messages arriving from external channels (Telegram, Discord, - * Slack, …) are stored with a multi-line prefix containing Conversation info, - * Sender info, and other AI-facing metadata blocks. These envelopes must be - * removed BEFORE normalization, because `stripInboundMetadata` relies on - * newline structure and fenced `json` code fences to locate sentinels; once - * `normalizeSessionText` collapses newlines into spaces, stripping is - * impossible. - * - * See: https://github.com/openclaw/openclaw/issues/63921 - */ -function stripInboundMetadataForUserRole(text: string, role: "user" | "assistant"): string { - if (role !== "user") { - return text; - } - return stripInboundMetadata(text); -} - -const GENERATED_SYSTEM_MESSAGE_RE = /^System(?: \(untrusted\))?: \[[^\]]+\]\s*/; - -function isGeneratedSystemWrapperMessage(text: string, role: "user" | "assistant"): boolean { - if (role !== "user") { - return false; - } - return GENERATED_SYSTEM_MESSAGE_RE.test(text); -} - -function isGeneratedCronPromptMessage(text: string, role: "user" | "assistant"): boolean { - if (role !== "user") { - return false; - } - return DIRECT_CRON_PROMPT_RE.test(text); -} - -function isGeneratedHeartbeatPromptMessage(text: string, role: "user" | "assistant"): boolean { - return role === "user" && isHeartbeatUserMessage({ role, content: text }, HEARTBEAT_PROMPT); -} - -function sanitizeSessionText(text: string, role: "user" | "assistant"): string | null { - const strippedInbound = stripInboundMetadataForUserRole(text, role); - const strippedInternal = stripInternalRuntimeContext(strippedInbound); - const normalized = normalizeSessionText(strippedInternal); - if (!normalized) { - return null; - } - if (isGeneratedSystemWrapperMessage(normalized, role)) { - return null; - } - if (isGeneratedCronPromptMessage(normalized, role)) { - return null; - } - if (isGeneratedHeartbeatPromptMessage(normalized, role)) { - return null; - } - if (isSilentReplyPayloadText(normalized)) { - return null; - } - // Assistant-side machinery acks: HEARTBEAT_OK is the canonical "all clear, - // nothing to do" reply to a heartbeat tick. Drop on the assistant side - // directly so we do not have to rely on cross-message coupling with the - // preceding user message (which a real user could spoof). - if (role === "assistant" && normalized === HEARTBEAT_TOKEN) { - return null; - } - const withoutSystemEnvelope = normalized.replace(GENERATED_SYSTEM_MESSAGE_RE, "").trim(); - if (isExecCompletionEvent(withoutSystemEnvelope)) { - return null; - } - return normalized; -} - -export function extractSessionText( - content: unknown, - role: "user" | "assistant" = "assistant", -): string | null { - const rawText = collectRawSessionText(content); - if (rawText === null) { - return null; - } - return sanitizeSessionText(rawText, role); -} - -function parseSessionTimestampMs( - record: { timestamp?: unknown }, - message: { timestamp?: unknown }, -): number { - const candidates = [message.timestamp, record.timestamp]; - for (const value of candidates) { - if (typeof value === "number" && Number.isFinite(value)) { - const ms = value > 0 && value < 1e11 ? value * 1000 : value; - if (Number.isFinite(ms) && ms > 0) { - return ms; - } - } - if (typeof value === "string") { - const parsed = Date.parse(value); - if (Number.isFinite(parsed) && parsed > 0) { - return parsed; - } - } - } - return 0; -} - -export async function buildSessionEntry( - absPath: string, - opts: BuildSessionEntryOptions = {}, -): Promise { - try { - const stat = await fs.stat(absPath); - if (shouldSkipTranscriptFileForDreaming(absPath)) { - return { - path: sessionPathForFile(absPath), - absPath, - mtimeMs: stat.mtimeMs, - size: stat.size, - hash: hashText("\n\n"), - content: "", - lineMap: [], - messageTimestampsMs: [], - }; - } - const raw = await fs.readFile(absPath, "utf-8"); - const lines = raw.split("\n"); - const collected: string[] = []; - const lineMap: number[] = []; - const messageTimestampsMs: number[] = []; - const sessionStoreClassification = - opts.generatedByDreamingNarrative === undefined || opts.generatedByCronRun === undefined - ? classifySessionTranscriptFromSessionStore(absPath) - : null; - let generatedByDreamingNarrative = - opts.generatedByDreamingNarrative ?? - sessionStoreClassification?.generatedByDreamingNarrative ?? - false; - const generatedByCronRun = - opts.generatedByCronRun ?? sessionStoreClassification?.generatedByCronRun ?? false; - for (let jsonlIdx = 0; jsonlIdx < lines.length; jsonlIdx++) { - const line = lines[jsonlIdx]; - if (!line.trim()) { - continue; - } - let record: unknown; - try { - record = JSON.parse(line); - } catch { - continue; - } - if (!generatedByDreamingNarrative && isDreamingNarrativeGeneratedRecord(record)) { - generatedByDreamingNarrative = true; - } - if ( - !record || - typeof record !== "object" || - (record as { type?: unknown }).type !== "message" - ) { - continue; - } - const message = (record as { message?: unknown }).message as - | { role?: unknown; content?: unknown; provenance?: unknown } - | undefined; - if (!message || typeof message.role !== "string") { - continue; - } - if (message.role !== "user" && message.role !== "assistant") { - continue; - } - if (message.role === "user" && hasInterSessionUserProvenance(message)) { - continue; - } - const rawText = collectRawSessionText(message.content); - if (rawText === null) { - continue; - } - const text = sanitizeSessionText(rawText, message.role); - if (!text) { - // Assistant-side machinery (silent replies, system wrappers) is already - // dropped by sanitizeSessionText. We deliberately do NOT use the prior - // user message's pattern-match to drop the next assistant message: - // user-typed text can match those same patterns (`[cron:...]`, - // `System (untrusted): ...`) and a cross-message drop would let users - // exfiltrate real assistant replies from the dreaming corpus by - // prefixing their own prompt. See PR #70737 review (aisle-research-bot). - continue; - } - if (generatedByDreamingNarrative || generatedByCronRun) { - continue; - } - const safe = redactSensitiveText(text, { mode: "tools" }); - const label = message.role === "user" ? "User" : "Assistant"; - const renderedLines = renderSessionExportLines(label, safe); - const timestampMs = parseSessionTimestampMs( - record as { timestamp?: unknown }, - message as { timestamp?: unknown }, - ); - collected.push(...renderedLines); - lineMap.push(...renderedLines.map(() => jsonlIdx + 1)); - messageTimestampsMs.push(...renderedLines.map(() => timestampMs)); - } - const content = collected.join("\n"); - return { - path: sessionPathForFile(absPath), - absPath, - mtimeMs: stat.mtimeMs, - size: stat.size, - hash: hashText(content + "\n" + lineMap.join(",") + "\n" + messageTimestampsMs.join(",")), - content, - lineMap, - messageTimestampsMs, - ...(generatedByDreamingNarrative ? { generatedByDreamingNarrative: true } : {}), - ...(generatedByCronRun ? { generatedByCronRun: true } : {}), - }; - } catch (err) { - void logSessionFileReadFailure(absPath, err); - return null; - } -} +export * from "../../../packages/memory-host-sdk/src/host/session-files.js"; diff --git a/src/memory-host-sdk/host/types.ts b/src/memory-host-sdk/host/types.ts index 7c99da2d32f..ab03cbea60c 100644 --- a/src/memory-host-sdk/host/types.ts +++ b/src/memory-host-sdk/host/types.ts @@ -1,107 +1 @@ -export type MemorySource = "memory" | "sessions"; - -export type MemorySearchResult = { - path: string; - startLine: number; - endLine: number; - score: number; - vectorScore?: number; - textScore?: number; - snippet: string; - source: MemorySource; - citation?: string; -}; - -export type MemoryEmbeddingProbeResult = { - ok: boolean; - error?: string; - checked?: boolean; - cached?: boolean; - checkedAtMs?: number; - cacheExpiresAtMs?: number; -}; - -export type MemorySyncProgressUpdate = { - completed: number; - total: number; - label?: string; -}; - -export type MemorySearchRuntimeDebug = { - backend: "builtin" | "qmd"; - configuredMode?: string; - effectiveMode?: string; - fallback?: string; -}; - -export type MemoryReadResult = { - text: string; - path: string; - truncated?: boolean; - from?: number; - lines?: number; - nextFrom?: number; -}; - -export type MemoryProviderStatus = { - backend: "builtin" | "qmd"; - provider: string; - model?: string; - requestedProvider?: string; - files?: number; - chunks?: number; - dirty?: boolean; - workspaceDir?: string; - dbPath?: string; - extraPaths?: string[]; - sources?: MemorySource[]; - sourceCounts?: Array<{ source: MemorySource; files: number; chunks: number }>; - cache?: { enabled: boolean; entries?: number; maxEntries?: number }; - fts?: { enabled: boolean; available: boolean; error?: string }; - fallback?: { from: string; reason?: string }; - vector?: { - enabled: boolean; - available?: boolean; - extensionPath?: string; - loadError?: string; - dims?: number; - }; - batch?: { - enabled: boolean; - failures: number; - limit: number; - wait: boolean; - concurrency: number; - pollIntervalMs: number; - timeoutMs: number; - lastError?: string; - lastProvider?: string; - }; - custom?: Record; -}; - -export interface MemorySearchManager { - search( - query: string, - opts?: { - maxResults?: number; - minScore?: number; - sessionKey?: string; - qmdSearchModeOverride?: "query" | "search" | "vsearch"; - onDebug?: (debug: MemorySearchRuntimeDebug) => void; - sources?: MemorySource[]; - }, - ): Promise; - readFile(params: { relPath: string; from?: number; lines?: number }): Promise; - status(): MemoryProviderStatus; - sync?(params?: { - reason?: string; - force?: boolean; - sessionFiles?: string[]; - progress?: (update: MemorySyncProgressUpdate) => void; - }): Promise; - getCachedEmbeddingAvailability?(): MemoryEmbeddingProbeResult | null; - probeEmbeddingAvailability(): Promise; - probeVectorAvailability(): Promise; - close?(): Promise; -} +export * from "../../../packages/memory-host-sdk/src/host/types.js";