mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 05:50:43 +00:00
refactor(memory): bridge host sdk duplicates
This commit is contained in:
@@ -1,15 +1,38 @@
|
||||
export {
|
||||
CHARS_PER_TOKEN_ESTIMATE,
|
||||
DEFAULT_SQLITE_WAL_AUTOCHECKPOINT_PAGES,
|
||||
DEFAULT_SQLITE_WAL_TRUNCATE_INTERVAL_MS,
|
||||
applyWindowsSpawnProgramPolicy,
|
||||
configureSqliteWalMaintenance,
|
||||
createSubsystemLogger,
|
||||
detectMime,
|
||||
estimateStringChars,
|
||||
installProcessWarningFilter,
|
||||
materializeWindowsSpawnProgram,
|
||||
redactSensitiveText,
|
||||
resolveGlobalSingleton,
|
||||
resolveUserPath,
|
||||
resolveWindowsExecutablePath,
|
||||
resolveWindowsSpawnProgram,
|
||||
resolveWindowsSpawnProgramCandidate,
|
||||
runTasksWithConcurrency,
|
||||
shortenHomeInString,
|
||||
shortenHomePath,
|
||||
shouldIgnoreWarning,
|
||||
splitShellArgs,
|
||||
truncateUtf16Safe,
|
||||
writeFileWithinRoot,
|
||||
} from "./openclaw-runtime.js";
|
||||
|
||||
export type {
|
||||
ProcessWarning,
|
||||
ResolveWindowsSpawnProgramCandidateParams,
|
||||
ResolveWindowsSpawnProgramParams,
|
||||
SqliteWalMaintenance,
|
||||
SqliteWalMaintenanceOptions,
|
||||
WindowsSpawnCandidateResolution,
|
||||
WindowsSpawnInvocation,
|
||||
WindowsSpawnProgram,
|
||||
WindowsSpawnProgramCandidate,
|
||||
WindowsSpawnResolution,
|
||||
} from "./openclaw-runtime.js";
|
||||
|
||||
@@ -78,6 +78,20 @@ export { writeFileWithinRoot } from "../../../../src/infra/fs-safe.js";
|
||||
export { fetchWithSsrFGuard } from "../../../../src/infra/net/fetch-guard.js";
|
||||
export { shouldUseEnvHttpProxyForUrl } from "../../../../src/infra/net/proxy-env.js";
|
||||
export { ssrfPolicyFromHttpBaseUrlAllowedHostname } from "../../../../src/infra/net/ssrf.js";
|
||||
export {
|
||||
DEFAULT_SQLITE_WAL_AUTOCHECKPOINT_PAGES,
|
||||
DEFAULT_SQLITE_WAL_TRUNCATE_INTERVAL_MS,
|
||||
configureSqliteWalMaintenance,
|
||||
} from "../../../../src/infra/sqlite-wal.js";
|
||||
export type {
|
||||
SqliteWalMaintenance,
|
||||
SqliteWalMaintenanceOptions,
|
||||
} from "../../../../src/infra/sqlite-wal.js";
|
||||
export {
|
||||
installProcessWarningFilter,
|
||||
shouldIgnoreWarning,
|
||||
} from "../../../../src/infra/warning-filter.js";
|
||||
export type { ProcessWarning } from "../../../../src/infra/warning-filter.js";
|
||||
export { redactSensitiveText } from "../../../../src/logging/redact.js";
|
||||
export { createSubsystemLogger } from "../../../../src/logging/subsystem.js";
|
||||
export { detectMime } from "../../../../src/media/mime.js";
|
||||
@@ -136,4 +150,20 @@ export {
|
||||
shortenHomePath,
|
||||
truncateUtf16Safe,
|
||||
} from "../../../../src/utils.js";
|
||||
export {
|
||||
applyWindowsSpawnProgramPolicy,
|
||||
materializeWindowsSpawnProgram,
|
||||
resolveWindowsExecutablePath,
|
||||
resolveWindowsSpawnProgram,
|
||||
resolveWindowsSpawnProgramCandidate,
|
||||
} from "../../../../src/plugin-sdk/windows-spawn.js";
|
||||
export type {
|
||||
ResolveWindowsSpawnProgramCandidateParams,
|
||||
ResolveWindowsSpawnProgramParams,
|
||||
WindowsSpawnCandidateResolution,
|
||||
WindowsSpawnInvocation,
|
||||
WindowsSpawnProgram,
|
||||
WindowsSpawnProgramCandidate,
|
||||
WindowsSpawnResolution,
|
||||
} from "../../../../src/plugin-sdk/windows-spawn.js";
|
||||
export { resolveGlobalSingleton } from "../../../../src/shared/global-singleton.js";
|
||||
|
||||
@@ -1,74 +1,6 @@
|
||||
import type { DatabaseSync } from "node:sqlite";
|
||||
|
||||
const DEFAULT_SQLITE_WAL_AUTOCHECKPOINT_PAGES = 1000;
|
||||
const DEFAULT_SQLITE_WAL_TRUNCATE_INTERVAL_MS = 30 * 60 * 1000;
|
||||
|
||||
type IntervalHandle = ReturnType<typeof setInterval> & {
|
||||
unref?: () => void;
|
||||
};
|
||||
|
||||
type SqliteWalCheckpointMode = "PASSIVE" | "FULL" | "RESTART" | "TRUNCATE";
|
||||
|
||||
export type SqliteWalMaintenance = {
|
||||
checkpoint: () => boolean;
|
||||
close: () => boolean;
|
||||
};
|
||||
|
||||
export type SqliteWalMaintenanceOptions = {
|
||||
autoCheckpointPages?: number;
|
||||
checkpointIntervalMs?: number;
|
||||
checkpointMode?: SqliteWalCheckpointMode;
|
||||
onCheckpointError?: (error: unknown) => void;
|
||||
};
|
||||
|
||||
function normalizeNonNegativeInteger(value: number, label: string): number {
|
||||
if (!Number.isInteger(value) || value < 0) {
|
||||
throw new Error(`${label} must be a non-negative integer`);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
export function configureSqliteWalMaintenance(
|
||||
db: DatabaseSync,
|
||||
options: SqliteWalMaintenanceOptions = {},
|
||||
): SqliteWalMaintenance {
|
||||
const autoCheckpointPages = normalizeNonNegativeInteger(
|
||||
options.autoCheckpointPages ?? DEFAULT_SQLITE_WAL_AUTOCHECKPOINT_PAGES,
|
||||
"autoCheckpointPages",
|
||||
);
|
||||
const checkpointIntervalMs = normalizeNonNegativeInteger(
|
||||
options.checkpointIntervalMs ?? DEFAULT_SQLITE_WAL_TRUNCATE_INTERVAL_MS,
|
||||
"checkpointIntervalMs",
|
||||
);
|
||||
const checkpointMode = options.checkpointMode ?? "TRUNCATE";
|
||||
|
||||
db.exec("PRAGMA journal_mode = WAL;");
|
||||
db.exec(`PRAGMA wal_autocheckpoint = ${autoCheckpointPages};`);
|
||||
|
||||
const checkpoint = (): boolean => {
|
||||
try {
|
||||
db.exec(`PRAGMA wal_checkpoint(${checkpointMode});`);
|
||||
return true;
|
||||
} catch (error) {
|
||||
options.onCheckpointError?.(error);
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
let timer: IntervalHandle | null = null;
|
||||
if (checkpointIntervalMs > 0) {
|
||||
timer = setInterval(checkpoint, checkpointIntervalMs) as IntervalHandle;
|
||||
timer.unref?.();
|
||||
}
|
||||
|
||||
return {
|
||||
checkpoint,
|
||||
close: () => {
|
||||
if (timer) {
|
||||
clearInterval(timer);
|
||||
timer = null;
|
||||
}
|
||||
return checkpoint();
|
||||
},
|
||||
};
|
||||
}
|
||||
export {
|
||||
DEFAULT_SQLITE_WAL_AUTOCHECKPOINT_PAGES,
|
||||
DEFAULT_SQLITE_WAL_TRUNCATE_INTERVAL_MS,
|
||||
configureSqliteWalMaintenance,
|
||||
} from "./openclaw-runtime-io.js";
|
||||
export type { SqliteWalMaintenance, SqliteWalMaintenanceOptions } from "./openclaw-runtime-io.js";
|
||||
|
||||
@@ -1,105 +1,2 @@
|
||||
const warningFilterKey = Symbol.for("openclaw.warning-filter");
|
||||
|
||||
export type ProcessWarning = {
|
||||
code?: string;
|
||||
name?: string;
|
||||
message?: string;
|
||||
};
|
||||
|
||||
type ProcessWarningInstallState = {
|
||||
installed: boolean;
|
||||
};
|
||||
|
||||
function resolveWarningFilterState(): ProcessWarningInstallState {
|
||||
const globalStore = globalThis as Record<PropertyKey, unknown>;
|
||||
if (Object.prototype.hasOwnProperty.call(globalStore, warningFilterKey)) {
|
||||
return globalStore[warningFilterKey] as ProcessWarningInstallState;
|
||||
}
|
||||
const state = { installed: false };
|
||||
globalStore[warningFilterKey] = state;
|
||||
return state;
|
||||
}
|
||||
|
||||
export function shouldIgnoreWarning(warning: ProcessWarning): boolean {
|
||||
if (warning.code === "DEP0040" && warning.message?.includes("punycode")) {
|
||||
return true;
|
||||
}
|
||||
if (warning.code === "DEP0060" && warning.message?.includes("util._extend")) {
|
||||
return true;
|
||||
}
|
||||
if (
|
||||
warning.name === "ExperimentalWarning" &&
|
||||
warning.message?.includes("SQLite is an experimental feature")
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
function normalizeWarningArgs(args: unknown[]): ProcessWarning {
|
||||
const warningArg = args[0];
|
||||
const secondArg = args[1];
|
||||
const thirdArg = args[2];
|
||||
let name: string | undefined;
|
||||
let code: string | undefined;
|
||||
let message: string | undefined;
|
||||
|
||||
if (warningArg instanceof Error) {
|
||||
name = warningArg.name;
|
||||
message = warningArg.message;
|
||||
code = (warningArg as Error & { code?: string }).code;
|
||||
} else if (typeof warningArg === "string") {
|
||||
message = warningArg;
|
||||
}
|
||||
|
||||
if (secondArg && typeof secondArg === "object" && !Array.isArray(secondArg)) {
|
||||
const options = secondArg as { type?: unknown; code?: unknown };
|
||||
if (typeof options.type === "string") {
|
||||
name = options.type;
|
||||
}
|
||||
if (typeof options.code === "string") {
|
||||
code = options.code;
|
||||
}
|
||||
} else {
|
||||
if (typeof secondArg === "string") {
|
||||
name = secondArg;
|
||||
}
|
||||
if (typeof thirdArg === "string") {
|
||||
code = thirdArg;
|
||||
}
|
||||
}
|
||||
|
||||
return { name, code, message };
|
||||
}
|
||||
|
||||
export function installProcessWarningFilter(): void {
|
||||
const state = resolveWarningFilterState();
|
||||
if (state.installed) {
|
||||
return;
|
||||
}
|
||||
|
||||
const originalEmitWarning = process.emitWarning.bind(process);
|
||||
const wrappedEmitWarning: typeof process.emitWarning = ((...args: unknown[]) => {
|
||||
if (shouldIgnoreWarning(normalizeWarningArgs(args))) {
|
||||
return;
|
||||
}
|
||||
if (
|
||||
args[0] instanceof Error &&
|
||||
args[1] &&
|
||||
typeof args[1] === "object" &&
|
||||
!Array.isArray(args[1])
|
||||
) {
|
||||
const warning = args[0];
|
||||
const emitted = Object.assign(new Error(warning.message), {
|
||||
name: warning.name,
|
||||
code: (warning as Error & { code?: string }).code,
|
||||
});
|
||||
process.emit("warning", emitted);
|
||||
return;
|
||||
}
|
||||
Reflect.apply(originalEmitWarning, process, args);
|
||||
}) as typeof process.emitWarning;
|
||||
|
||||
process.emitWarning = wrappedEmitWarning;
|
||||
state.installed = true;
|
||||
}
|
||||
export { installProcessWarningFilter, shouldIgnoreWarning } from "./openclaw-runtime-io.js";
|
||||
export type { ProcessWarning } from "./openclaw-runtime-io.js";
|
||||
|
||||
@@ -1,285 +1,9 @@
|
||||
import { readFileSync, statSync } from "node:fs";
|
||||
import path from "node:path";
|
||||
import { normalizeLowercaseStringOrEmpty, normalizeOptionalString } from "./string-utils.js";
|
||||
|
||||
type WindowsSpawnResolution = "direct" | "node-entrypoint" | "exe-entrypoint" | "shell-fallback";
|
||||
|
||||
type WindowsSpawnCandidateResolution = Exclude<WindowsSpawnResolution, "shell-fallback">;
|
||||
|
||||
type WindowsSpawnProgramCandidate = {
|
||||
command: string;
|
||||
leadingArgv: string[];
|
||||
resolution: WindowsSpawnCandidateResolution | "unresolved-wrapper";
|
||||
windowsHide?: boolean;
|
||||
};
|
||||
|
||||
export type WindowsSpawnProgram = {
|
||||
command: string;
|
||||
leadingArgv: string[];
|
||||
resolution: WindowsSpawnResolution;
|
||||
shell?: boolean;
|
||||
windowsHide?: boolean;
|
||||
};
|
||||
|
||||
export type WindowsSpawnInvocation = {
|
||||
command: string;
|
||||
argv: string[];
|
||||
resolution: WindowsSpawnResolution;
|
||||
shell?: boolean;
|
||||
windowsHide?: boolean;
|
||||
};
|
||||
|
||||
export type ResolveWindowsSpawnProgramParams = {
|
||||
command: string;
|
||||
platform?: NodeJS.Platform;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
execPath?: string;
|
||||
packageName?: string;
|
||||
allowShellFallback?: boolean;
|
||||
};
|
||||
|
||||
function isFilePath(candidate: string): boolean {
|
||||
try {
|
||||
return statSync(candidate).isFile();
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function resolveWindowsExecutablePath(command: string, env: NodeJS.ProcessEnv): string {
|
||||
if (command.includes("/") || command.includes("\\") || path.isAbsolute(command)) {
|
||||
return command;
|
||||
}
|
||||
|
||||
const pathValue = env.PATH ?? env.Path ?? process.env.PATH ?? process.env.Path ?? "";
|
||||
const pathEntries = pathValue
|
||||
.split(";")
|
||||
.map((entry) => entry.trim())
|
||||
.filter(Boolean);
|
||||
const hasExtension = path.extname(command).length > 0;
|
||||
const pathExtRaw =
|
||||
env.PATHEXT ??
|
||||
env.Pathext ??
|
||||
process.env.PATHEXT ??
|
||||
process.env.Pathext ??
|
||||
".EXE;.CMD;.BAT;.COM";
|
||||
const pathExt = hasExtension
|
||||
? [""]
|
||||
: pathExtRaw
|
||||
.split(";")
|
||||
.map((ext) => ext.trim())
|
||||
.filter(Boolean)
|
||||
.map((ext) => (ext.startsWith(".") ? ext : `.${ext}`));
|
||||
|
||||
for (const dir of pathEntries) {
|
||||
for (const ext of pathExt) {
|
||||
const normalizedExt = normalizeLowercaseStringOrEmpty(ext);
|
||||
const uppercaseExt = ext.toUpperCase();
|
||||
for (const candidateExt of [ext, normalizedExt, uppercaseExt]) {
|
||||
const candidate = path.join(dir, `${command}${candidateExt}`);
|
||||
if (isFilePath(candidate)) {
|
||||
return candidate;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return command;
|
||||
}
|
||||
|
||||
function resolveEntrypointFromCmdShim(wrapperPath: string): string | null {
|
||||
if (!isFilePath(wrapperPath)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
const content = readFileSync(wrapperPath, "utf8");
|
||||
const candidates: string[] = [];
|
||||
for (const match of content.matchAll(/"([^"\r\n]*)"/g)) {
|
||||
const token = match[1] ?? "";
|
||||
const relMatch = token.match(/%~?dp0%?\s*[\\/]*(.*)$/i);
|
||||
const relative = relMatch?.[1]?.trim();
|
||||
if (!relative) {
|
||||
continue;
|
||||
}
|
||||
const normalizedRelative = relative.replace(/[\\/]+/g, path.sep).replace(/^[\\/]+/, "");
|
||||
const candidate = path.resolve(path.dirname(wrapperPath), normalizedRelative);
|
||||
if (isFilePath(candidate)) {
|
||||
candidates.push(candidate);
|
||||
}
|
||||
}
|
||||
const nonNode = candidates.find((candidate) => {
|
||||
const base = normalizeLowercaseStringOrEmpty(path.basename(candidate));
|
||||
return base !== "node.exe" && base !== "node";
|
||||
});
|
||||
return nonNode ?? null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function resolveBinEntry(
|
||||
packageName: string | undefined,
|
||||
binField: string | Record<string, string> | undefined,
|
||||
): string | null {
|
||||
if (typeof binField === "string") {
|
||||
return normalizeOptionalString(binField) ?? null;
|
||||
}
|
||||
if (!binField || typeof binField !== "object") {
|
||||
return null;
|
||||
}
|
||||
if (packageName) {
|
||||
const preferred = binField[packageName];
|
||||
const normalizedPreferred =
|
||||
typeof preferred === "string" ? normalizeOptionalString(preferred) : undefined;
|
||||
if (normalizedPreferred) {
|
||||
return normalizedPreferred;
|
||||
}
|
||||
}
|
||||
for (const value of Object.values(binField)) {
|
||||
const normalizedValue = typeof value === "string" ? normalizeOptionalString(value) : undefined;
|
||||
if (normalizedValue) {
|
||||
return normalizedValue;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function resolveEntrypointFromPackageJson(
|
||||
wrapperPath: string,
|
||||
packageName?: string,
|
||||
): string | null {
|
||||
if (!packageName) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const wrapperDir = path.dirname(wrapperPath);
|
||||
const packageDirs = [
|
||||
path.resolve(wrapperDir, "..", packageName),
|
||||
path.resolve(wrapperDir, "node_modules", packageName),
|
||||
];
|
||||
|
||||
for (const packageDir of packageDirs) {
|
||||
const packageJsonPath = path.join(packageDir, "package.json");
|
||||
if (!isFilePath(packageJsonPath)) {
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
const packageJson = JSON.parse(readFileSync(packageJsonPath, "utf8")) as {
|
||||
bin?: string | Record<string, string>;
|
||||
};
|
||||
const entryRel = resolveBinEntry(packageName, packageJson.bin);
|
||||
if (!entryRel) {
|
||||
continue;
|
||||
}
|
||||
const entryPath = path.resolve(packageDir, entryRel);
|
||||
if (isFilePath(entryPath)) {
|
||||
return entryPath;
|
||||
}
|
||||
} catch {
|
||||
// Ignore malformed package metadata.
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function resolveWindowsSpawnProgramCandidate(
|
||||
params: ResolveWindowsSpawnProgramParams,
|
||||
): WindowsSpawnProgramCandidate {
|
||||
const platform = params.platform ?? process.platform;
|
||||
const env = params.env ?? process.env;
|
||||
const execPath = params.execPath ?? process.execPath;
|
||||
|
||||
if (platform !== "win32") {
|
||||
return {
|
||||
command: params.command,
|
||||
leadingArgv: [],
|
||||
resolution: "direct",
|
||||
};
|
||||
}
|
||||
|
||||
const resolvedCommand = resolveWindowsExecutablePath(params.command, env);
|
||||
const ext = normalizeLowercaseStringOrEmpty(path.extname(resolvedCommand));
|
||||
if (ext === ".js" || ext === ".cjs" || ext === ".mjs") {
|
||||
return {
|
||||
command: execPath,
|
||||
leadingArgv: [resolvedCommand],
|
||||
resolution: "node-entrypoint",
|
||||
windowsHide: true,
|
||||
};
|
||||
}
|
||||
|
||||
if (ext === ".cmd" || ext === ".bat") {
|
||||
const entrypoint =
|
||||
resolveEntrypointFromCmdShim(resolvedCommand) ??
|
||||
resolveEntrypointFromPackageJson(resolvedCommand, params.packageName);
|
||||
if (entrypoint) {
|
||||
const entryExt = normalizeLowercaseStringOrEmpty(path.extname(entrypoint));
|
||||
if (entryExt === ".exe") {
|
||||
return {
|
||||
command: entrypoint,
|
||||
leadingArgv: [],
|
||||
resolution: "exe-entrypoint",
|
||||
windowsHide: true,
|
||||
};
|
||||
}
|
||||
return {
|
||||
command: execPath,
|
||||
leadingArgv: [entrypoint],
|
||||
resolution: "node-entrypoint",
|
||||
windowsHide: true,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
command: resolvedCommand,
|
||||
leadingArgv: [],
|
||||
resolution: "unresolved-wrapper",
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
command: resolvedCommand,
|
||||
leadingArgv: [],
|
||||
resolution: "direct",
|
||||
};
|
||||
}
|
||||
|
||||
export function resolveWindowsSpawnProgram(
|
||||
params: ResolveWindowsSpawnProgramParams,
|
||||
): WindowsSpawnProgram {
|
||||
const candidate = resolveWindowsSpawnProgramCandidate(params);
|
||||
if (candidate.resolution !== "unresolved-wrapper") {
|
||||
return {
|
||||
command: candidate.command,
|
||||
leadingArgv: candidate.leadingArgv,
|
||||
resolution: candidate.resolution,
|
||||
windowsHide: candidate.windowsHide,
|
||||
};
|
||||
}
|
||||
if (params.allowShellFallback === true) {
|
||||
return {
|
||||
command: candidate.command,
|
||||
leadingArgv: [],
|
||||
resolution: "shell-fallback",
|
||||
shell: true,
|
||||
};
|
||||
}
|
||||
throw new Error(
|
||||
`${path.basename(candidate.command)} wrapper resolved, but no executable/Node entrypoint could be resolved without shell execution.`,
|
||||
);
|
||||
}
|
||||
|
||||
export function materializeWindowsSpawnProgram(
|
||||
program: WindowsSpawnProgram,
|
||||
argv: string[],
|
||||
): WindowsSpawnInvocation {
|
||||
return {
|
||||
command: program.command,
|
||||
argv: [...program.leadingArgv, ...argv],
|
||||
resolution: program.resolution,
|
||||
shell: program.shell,
|
||||
windowsHide: program.windowsHide,
|
||||
};
|
||||
}
|
||||
export {
|
||||
materializeWindowsSpawnProgram,
|
||||
resolveWindowsSpawnProgram,
|
||||
} from "./openclaw-runtime-io.js";
|
||||
export type {
|
||||
ResolveWindowsSpawnProgramParams,
|
||||
WindowsSpawnInvocation,
|
||||
WindowsSpawnProgram,
|
||||
} from "./openclaw-runtime-io.js";
|
||||
|
||||
@@ -1,460 +1 @@
|
||||
import fs from "node:fs";
|
||||
import path from "node:path";
|
||||
import { resolveAgentWorkspaceDir } from "../../agents/agent-scope-config.js";
|
||||
import { parseDurationMs } from "../../cli/parse-duration.js";
|
||||
import type { SessionSendPolicyConfig } from "../../config/types.base.js";
|
||||
import type {
|
||||
MemoryBackend,
|
||||
MemoryCitationsMode,
|
||||
MemoryQmdConfig,
|
||||
MemoryQmdIndexPath,
|
||||
MemoryQmdMcporterConfig,
|
||||
MemoryQmdSearchMode,
|
||||
MemoryQmdStartupMode,
|
||||
} from "../../config/types.memory.js";
|
||||
import type { OpenClawConfig } from "../../config/types.openclaw.js";
|
||||
import { CANONICAL_ROOT_MEMORY_FILENAME } from "../../memory/root-memory-files.js";
|
||||
import { normalizeAgentId } from "../../routing/session-key.js";
|
||||
import {
|
||||
normalizeLowercaseStringOrEmpty,
|
||||
normalizeOptionalString,
|
||||
} from "../../shared/string-coerce.js";
|
||||
import { resolveUserPath } from "../../utils.js";
|
||||
import { splitShellArgs } from "../../utils/shell-argv.js";
|
||||
|
||||
export type ResolvedMemoryBackendConfig = {
|
||||
backend: MemoryBackend;
|
||||
citations: MemoryCitationsMode;
|
||||
qmd?: ResolvedQmdConfig;
|
||||
};
|
||||
|
||||
export type ResolvedQmdCollection = {
|
||||
name: string;
|
||||
path: string;
|
||||
pattern: string;
|
||||
kind: "memory" | "custom" | "sessions";
|
||||
};
|
||||
|
||||
export type ResolvedQmdUpdateConfig = {
|
||||
intervalMs: number;
|
||||
debounceMs: number;
|
||||
onBoot: boolean;
|
||||
startup: MemoryQmdStartupMode;
|
||||
startupDelayMs: number;
|
||||
waitForBootSync: boolean;
|
||||
embedIntervalMs: number;
|
||||
commandTimeoutMs: number;
|
||||
updateTimeoutMs: number;
|
||||
embedTimeoutMs: number;
|
||||
};
|
||||
|
||||
export type ResolvedQmdLimitsConfig = {
|
||||
maxResults: number;
|
||||
maxSnippetChars: number;
|
||||
maxInjectedChars: number;
|
||||
timeoutMs: number;
|
||||
};
|
||||
|
||||
export type ResolvedQmdSessionConfig = {
|
||||
enabled: boolean;
|
||||
exportDir?: string;
|
||||
retentionDays?: number;
|
||||
};
|
||||
|
||||
export type ResolvedQmdMcporterConfig = {
|
||||
enabled: boolean;
|
||||
serverName: string;
|
||||
startDaemon: boolean;
|
||||
};
|
||||
|
||||
export type ResolvedQmdConfig = {
|
||||
command: string;
|
||||
mcporter: ResolvedQmdMcporterConfig;
|
||||
searchMode: MemoryQmdSearchMode;
|
||||
searchTool?: string;
|
||||
collections: ResolvedQmdCollection[];
|
||||
sessions: ResolvedQmdSessionConfig;
|
||||
update: ResolvedQmdUpdateConfig;
|
||||
limits: ResolvedQmdLimitsConfig;
|
||||
includeDefaultMemory: boolean;
|
||||
scope?: SessionSendPolicyConfig;
|
||||
};
|
||||
|
||||
const DEFAULT_BACKEND: MemoryBackend = "builtin";
|
||||
const DEFAULT_CITATIONS: MemoryCitationsMode = "auto";
|
||||
const DEFAULT_QMD_INTERVAL = "5m";
|
||||
const DEFAULT_QMD_DEBOUNCE_MS = 15_000;
|
||||
const DEFAULT_QMD_TIMEOUT_MS = 4_000;
|
||||
// Defaulting to `query` can be extremely slow on CPU-only systems (query expansion + rerank).
|
||||
// Prefer a faster mode for interactive use; users can opt into `query` for best recall.
|
||||
const DEFAULT_QMD_SEARCH_MODE: MemoryQmdSearchMode = "search";
|
||||
const DEFAULT_QMD_STARTUP: MemoryQmdStartupMode = "off";
|
||||
const DEFAULT_QMD_STARTUP_DELAY_MS = 120_000;
|
||||
const DEFAULT_QMD_EMBED_INTERVAL = "60m";
|
||||
const DEFAULT_QMD_COMMAND_TIMEOUT_MS = 30_000;
|
||||
const DEFAULT_QMD_UPDATE_TIMEOUT_MS = 120_000;
|
||||
const DEFAULT_QMD_EMBED_TIMEOUT_MS = 120_000;
|
||||
const DEFAULT_QMD_LIMITS: ResolvedQmdLimitsConfig = {
|
||||
maxResults: 4,
|
||||
maxSnippetChars: 450,
|
||||
maxInjectedChars: 2_200,
|
||||
timeoutMs: DEFAULT_QMD_TIMEOUT_MS,
|
||||
};
|
||||
const DEFAULT_QMD_MCPORTER: ResolvedQmdMcporterConfig = {
|
||||
enabled: false,
|
||||
serverName: "qmd",
|
||||
startDaemon: true,
|
||||
};
|
||||
|
||||
const DEFAULT_QMD_SCOPE: SessionSendPolicyConfig = {
|
||||
default: "deny",
|
||||
rules: [
|
||||
{
|
||||
action: "allow",
|
||||
match: { chatType: "direct" },
|
||||
},
|
||||
{
|
||||
action: "allow",
|
||||
match: { chatType: "channel" },
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
function sanitizeName(input: string): string {
|
||||
const lower = normalizeLowercaseStringOrEmpty(input).replace(/[^a-z0-9-]+/g, "-");
|
||||
const trimmed = lower.replace(/^-+|-+$/g, "");
|
||||
return trimmed || "collection";
|
||||
}
|
||||
|
||||
function scopeCollectionBase(base: string, agentId: string): string {
|
||||
return `${base}-${sanitizeName(agentId)}`;
|
||||
}
|
||||
|
||||
function canonicalizePathForContainment(rawPath: string): string {
|
||||
const resolved = path.resolve(rawPath);
|
||||
let current = resolved;
|
||||
const suffix: string[] = [];
|
||||
while (true) {
|
||||
try {
|
||||
const canonical = path.normalize(fs.realpathSync.native(current));
|
||||
return path.normalize(path.join(canonical, ...suffix));
|
||||
} catch {
|
||||
const parent = path.dirname(current);
|
||||
if (parent === current) {
|
||||
return path.normalize(resolved);
|
||||
}
|
||||
suffix.unshift(path.basename(current));
|
||||
current = parent;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function isPathInsideRoot(candidatePath: string, rootPath: string): boolean {
|
||||
const relative = path.relative(
|
||||
canonicalizePathForContainment(rootPath),
|
||||
canonicalizePathForContainment(candidatePath),
|
||||
);
|
||||
return relative === "" || (!relative.startsWith("..") && !path.isAbsolute(relative));
|
||||
}
|
||||
|
||||
function ensureUniqueName(base: string, existing: Set<string>): string {
|
||||
let name = sanitizeName(base);
|
||||
if (!existing.has(name)) {
|
||||
existing.add(name);
|
||||
return name;
|
||||
}
|
||||
let suffix = 2;
|
||||
while (existing.has(`${name}-${suffix}`)) {
|
||||
suffix += 1;
|
||||
}
|
||||
const unique = `${name}-${suffix}`;
|
||||
existing.add(unique);
|
||||
return unique;
|
||||
}
|
||||
|
||||
function resolvePath(raw: string, workspaceDir: string): string {
|
||||
const trimmed = raw.trim();
|
||||
if (!trimmed) {
|
||||
throw new Error("path required");
|
||||
}
|
||||
if (trimmed.startsWith("~") || path.isAbsolute(trimmed)) {
|
||||
return path.normalize(resolveUserPath(trimmed));
|
||||
}
|
||||
return path.normalize(path.resolve(workspaceDir, trimmed));
|
||||
}
|
||||
|
||||
function resolveIntervalMs(raw: string | undefined): number {
|
||||
const value = raw?.trim();
|
||||
if (!value) {
|
||||
return parseDurationMs(DEFAULT_QMD_INTERVAL, { defaultUnit: "m" });
|
||||
}
|
||||
try {
|
||||
return parseDurationMs(value, { defaultUnit: "m" });
|
||||
} catch {
|
||||
return parseDurationMs(DEFAULT_QMD_INTERVAL, { defaultUnit: "m" });
|
||||
}
|
||||
}
|
||||
|
||||
function resolveEmbedIntervalMs(raw: string | undefined): number {
|
||||
const value = raw?.trim();
|
||||
if (!value) {
|
||||
return parseDurationMs(DEFAULT_QMD_EMBED_INTERVAL, { defaultUnit: "m" });
|
||||
}
|
||||
try {
|
||||
return parseDurationMs(value, { defaultUnit: "m" });
|
||||
} catch {
|
||||
return parseDurationMs(DEFAULT_QMD_EMBED_INTERVAL, { defaultUnit: "m" });
|
||||
}
|
||||
}
|
||||
|
||||
function resolveDebounceMs(raw: number | undefined): number {
|
||||
if (typeof raw === "number" && Number.isFinite(raw) && raw >= 0) {
|
||||
return Math.floor(raw);
|
||||
}
|
||||
return DEFAULT_QMD_DEBOUNCE_MS;
|
||||
}
|
||||
|
||||
function resolveTimeoutMs(raw: number | undefined, fallback: number): number {
|
||||
if (typeof raw === "number" && Number.isFinite(raw) && raw > 0) {
|
||||
return Math.floor(raw);
|
||||
}
|
||||
return fallback;
|
||||
}
|
||||
|
||||
function resolveStartupMode(raw: MemoryQmdConfig["update"]): MemoryQmdStartupMode {
|
||||
const value = raw?.startup;
|
||||
if (value === "idle" || value === "immediate" || value === "off") {
|
||||
return value;
|
||||
}
|
||||
return DEFAULT_QMD_STARTUP;
|
||||
}
|
||||
|
||||
function resolveStartupDelayMs(raw: number | undefined): number {
|
||||
if (typeof raw === "number" && Number.isFinite(raw) && raw >= 0) {
|
||||
return Math.floor(raw);
|
||||
}
|
||||
return DEFAULT_QMD_STARTUP_DELAY_MS;
|
||||
}
|
||||
|
||||
function resolveLimits(raw?: MemoryQmdConfig["limits"]): ResolvedQmdLimitsConfig {
|
||||
const parsed: ResolvedQmdLimitsConfig = { ...DEFAULT_QMD_LIMITS };
|
||||
if (raw?.maxResults && raw.maxResults > 0) {
|
||||
parsed.maxResults = Math.floor(raw.maxResults);
|
||||
}
|
||||
if (raw?.maxSnippetChars && raw.maxSnippetChars > 0) {
|
||||
parsed.maxSnippetChars = Math.floor(raw.maxSnippetChars);
|
||||
}
|
||||
if (raw?.maxInjectedChars && raw.maxInjectedChars > 0) {
|
||||
parsed.maxInjectedChars = Math.floor(raw.maxInjectedChars);
|
||||
}
|
||||
if (raw?.timeoutMs && raw.timeoutMs > 0) {
|
||||
parsed.timeoutMs = Math.floor(raw.timeoutMs);
|
||||
}
|
||||
return parsed;
|
||||
}
|
||||
|
||||
function resolveSearchMode(raw?: MemoryQmdConfig["searchMode"]): MemoryQmdSearchMode {
|
||||
if (raw === "search" || raw === "vsearch" || raw === "query") {
|
||||
return raw;
|
||||
}
|
||||
return DEFAULT_QMD_SEARCH_MODE;
|
||||
}
|
||||
|
||||
function resolveSearchTool(raw?: MemoryQmdConfig["searchTool"]): string | undefined {
|
||||
const value = raw?.trim();
|
||||
return value ? value : undefined;
|
||||
}
|
||||
|
||||
function resolveSessionConfig(
|
||||
cfg: MemoryQmdConfig["sessions"],
|
||||
workspaceDir: string,
|
||||
): ResolvedQmdSessionConfig {
|
||||
const enabled = Boolean(cfg?.enabled);
|
||||
const exportDirRaw = cfg?.exportDir?.trim();
|
||||
const exportDir = exportDirRaw ? resolvePath(exportDirRaw, workspaceDir) : undefined;
|
||||
const retentionDays =
|
||||
cfg?.retentionDays && cfg.retentionDays > 0 ? Math.floor(cfg.retentionDays) : undefined;
|
||||
return {
|
||||
enabled,
|
||||
exportDir,
|
||||
retentionDays,
|
||||
};
|
||||
}
|
||||
|
||||
function resolveCustomPaths(
|
||||
rawPaths: MemoryQmdIndexPath[] | undefined,
|
||||
workspaceDir: string,
|
||||
existing: Set<string>,
|
||||
agentId: string,
|
||||
): ResolvedQmdCollection[] {
|
||||
if (!rawPaths?.length) {
|
||||
return [];
|
||||
}
|
||||
const collections: ResolvedQmdCollection[] = [];
|
||||
const seenRoots = new Set<string>();
|
||||
rawPaths.forEach((entry, index) => {
|
||||
const trimmedPath = normalizeOptionalString(entry?.path);
|
||||
if (!trimmedPath) {
|
||||
return;
|
||||
}
|
||||
let resolved: string;
|
||||
try {
|
||||
resolved = resolvePath(trimmedPath, workspaceDir);
|
||||
} catch {
|
||||
return;
|
||||
}
|
||||
const pattern = normalizeOptionalString(entry.pattern) || "**/*.md";
|
||||
const dedupeKey = `${resolved}\u0000${pattern}`;
|
||||
if (seenRoots.has(dedupeKey)) {
|
||||
return;
|
||||
}
|
||||
seenRoots.add(dedupeKey);
|
||||
const explicitName = entry.name?.trim();
|
||||
const baseName =
|
||||
explicitName && !isPathInsideRoot(resolved, workspaceDir)
|
||||
? explicitName
|
||||
: scopeCollectionBase(explicitName || `custom-${index + 1}`, agentId);
|
||||
const name = ensureUniqueName(baseName, existing);
|
||||
collections.push({
|
||||
name,
|
||||
path: resolved,
|
||||
pattern,
|
||||
kind: "custom",
|
||||
});
|
||||
});
|
||||
return collections;
|
||||
}
|
||||
|
||||
function resolveMcporterConfig(raw?: MemoryQmdMcporterConfig): ResolvedQmdMcporterConfig {
|
||||
const parsed: ResolvedQmdMcporterConfig = { ...DEFAULT_QMD_MCPORTER };
|
||||
if (!raw) {
|
||||
return parsed;
|
||||
}
|
||||
if (raw.enabled !== undefined) {
|
||||
parsed.enabled = raw.enabled;
|
||||
}
|
||||
if (typeof raw.serverName === "string" && raw.serverName.trim()) {
|
||||
parsed.serverName = raw.serverName.trim();
|
||||
}
|
||||
if (raw.startDaemon !== undefined) {
|
||||
parsed.startDaemon = raw.startDaemon;
|
||||
}
|
||||
// When enabled, default startDaemon to true.
|
||||
if (parsed.enabled && raw.startDaemon === undefined) {
|
||||
parsed.startDaemon = true;
|
||||
}
|
||||
return parsed;
|
||||
}
|
||||
|
||||
function resolveDefaultCollections(
|
||||
include: boolean,
|
||||
workspaceDir: string,
|
||||
existing: Set<string>,
|
||||
agentId: string,
|
||||
): ResolvedQmdCollection[] {
|
||||
if (!include) {
|
||||
return [];
|
||||
}
|
||||
const entries: Array<{ path: string; pattern: string; base: string }> = [
|
||||
{ path: workspaceDir, pattern: CANONICAL_ROOT_MEMORY_FILENAME, base: "memory-root" },
|
||||
{ path: path.join(workspaceDir, "memory"), pattern: "**/*.md", base: "memory-dir" },
|
||||
];
|
||||
return entries.map((entry) => ({
|
||||
name: ensureUniqueName(scopeCollectionBase(entry.base, agentId), existing),
|
||||
path: entry.path,
|
||||
pattern: entry.pattern,
|
||||
kind: "memory",
|
||||
}));
|
||||
}
|
||||
|
||||
export function resolveMemoryBackendConfig(params: {
|
||||
cfg: OpenClawConfig;
|
||||
agentId: string;
|
||||
}): ResolvedMemoryBackendConfig {
|
||||
const normalizedAgentId = normalizeAgentId(params.agentId);
|
||||
const backend = params.cfg.memory?.backend ?? DEFAULT_BACKEND;
|
||||
const citations = params.cfg.memory?.citations ?? DEFAULT_CITATIONS;
|
||||
if (backend !== "qmd") {
|
||||
return { backend: "builtin", citations };
|
||||
}
|
||||
|
||||
const workspaceDir = resolveAgentWorkspaceDir(params.cfg, normalizedAgentId);
|
||||
const qmdCfg = params.cfg.memory?.qmd;
|
||||
const includeDefaultMemory = qmdCfg?.includeDefaultMemory !== false;
|
||||
const nameSet = new Set<string>();
|
||||
const agentEntry = params.cfg.agents?.list?.find(
|
||||
(entry) => normalizeAgentId(entry?.id) === normalizedAgentId,
|
||||
);
|
||||
const mergedExtraPaths = [
|
||||
...(params.cfg.agents?.defaults?.memorySearch?.extraPaths ?? []),
|
||||
...(agentEntry?.memorySearch?.extraPaths ?? []),
|
||||
]
|
||||
.filter((value): value is string => typeof value === "string")
|
||||
.map((value) => value.trim())
|
||||
.filter(Boolean);
|
||||
const dedupedExtraPaths = Array.from(new Set(mergedExtraPaths));
|
||||
const searchExtraPaths = dedupedExtraPaths.map(
|
||||
(pathValue): { path: string; pattern?: string; name?: string } => ({ path: pathValue }),
|
||||
);
|
||||
const mergedExtraCollections = [
|
||||
...(params.cfg.agents?.defaults?.memorySearch?.qmd?.extraCollections ?? []),
|
||||
...(agentEntry?.memorySearch?.qmd?.extraCollections ?? []),
|
||||
].filter(
|
||||
(value): value is MemoryQmdIndexPath =>
|
||||
value !== null && typeof value === "object" && typeof value.path === "string",
|
||||
);
|
||||
|
||||
// Combine QMD-specific paths with extraPaths and per-agent cross-agent collections.
|
||||
const allQmdPaths: MemoryQmdIndexPath[] = [
|
||||
...(qmdCfg?.paths ?? []),
|
||||
...searchExtraPaths,
|
||||
...mergedExtraCollections,
|
||||
];
|
||||
|
||||
const collections = [
|
||||
...resolveDefaultCollections(includeDefaultMemory, workspaceDir, nameSet, normalizedAgentId),
|
||||
...resolveCustomPaths(allQmdPaths, workspaceDir, nameSet, normalizedAgentId),
|
||||
];
|
||||
|
||||
const rawCommand = normalizeOptionalString(qmdCfg?.command) || "qmd";
|
||||
const parsedCommand = splitShellArgs(rawCommand);
|
||||
const command = parsedCommand?.[0] || rawCommand.split(/\s+/)[0] || "qmd";
|
||||
const resolved: ResolvedQmdConfig = {
|
||||
command,
|
||||
mcporter: resolveMcporterConfig(qmdCfg?.mcporter),
|
||||
searchMode: resolveSearchMode(qmdCfg?.searchMode),
|
||||
searchTool: resolveSearchTool(qmdCfg?.searchTool),
|
||||
collections,
|
||||
includeDefaultMemory,
|
||||
sessions: resolveSessionConfig(qmdCfg?.sessions, workspaceDir),
|
||||
update: {
|
||||
intervalMs: resolveIntervalMs(qmdCfg?.update?.interval),
|
||||
debounceMs: resolveDebounceMs(qmdCfg?.update?.debounceMs),
|
||||
onBoot: qmdCfg?.update?.onBoot !== false,
|
||||
startup: resolveStartupMode(qmdCfg?.update),
|
||||
startupDelayMs: resolveStartupDelayMs(qmdCfg?.update?.startupDelayMs),
|
||||
waitForBootSync: qmdCfg?.update?.waitForBootSync === true,
|
||||
embedIntervalMs: resolveEmbedIntervalMs(qmdCfg?.update?.embedInterval),
|
||||
commandTimeoutMs: resolveTimeoutMs(
|
||||
qmdCfg?.update?.commandTimeoutMs,
|
||||
DEFAULT_QMD_COMMAND_TIMEOUT_MS,
|
||||
),
|
||||
updateTimeoutMs: resolveTimeoutMs(
|
||||
qmdCfg?.update?.updateTimeoutMs,
|
||||
DEFAULT_QMD_UPDATE_TIMEOUT_MS,
|
||||
),
|
||||
embedTimeoutMs: resolveTimeoutMs(
|
||||
qmdCfg?.update?.embedTimeoutMs,
|
||||
DEFAULT_QMD_EMBED_TIMEOUT_MS,
|
||||
),
|
||||
},
|
||||
limits: resolveLimits(qmdCfg?.limits),
|
||||
scope: qmdCfg?.scope ?? DEFAULT_QMD_SCOPE,
|
||||
};
|
||||
|
||||
return {
|
||||
backend: "qmd",
|
||||
citations,
|
||||
qmd: resolved,
|
||||
};
|
||||
}
|
||||
export * from "../../../packages/memory-host-sdk/src/host/backend-config.js";
|
||||
|
||||
@@ -1,55 +1 @@
|
||||
export type EmbeddingBatchOutputLine = {
|
||||
custom_id?: string;
|
||||
error?: { message?: string };
|
||||
response?: {
|
||||
status_code?: number;
|
||||
body?:
|
||||
| {
|
||||
data?: Array<{
|
||||
embedding?: number[];
|
||||
}>;
|
||||
error?: { message?: string };
|
||||
}
|
||||
| string;
|
||||
};
|
||||
};
|
||||
|
||||
export function applyEmbeddingBatchOutputLine(params: {
|
||||
line: EmbeddingBatchOutputLine;
|
||||
remaining: Set<string>;
|
||||
errors: string[];
|
||||
byCustomId: Map<string, number[]>;
|
||||
}) {
|
||||
const customId = params.line.custom_id;
|
||||
if (!customId) {
|
||||
return;
|
||||
}
|
||||
params.remaining.delete(customId);
|
||||
|
||||
const errorMessage = params.line.error?.message;
|
||||
if (errorMessage) {
|
||||
params.errors.push(`${customId}: ${errorMessage}`);
|
||||
return;
|
||||
}
|
||||
|
||||
const response = params.line.response;
|
||||
const statusCode = response?.status_code ?? 0;
|
||||
if (statusCode >= 400) {
|
||||
const messageFromObject =
|
||||
response?.body && typeof response.body === "object"
|
||||
? (response.body as { error?: { message?: string } }).error?.message
|
||||
: undefined;
|
||||
const messageFromString = typeof response?.body === "string" ? response.body : undefined;
|
||||
params.errors.push(`${customId}: ${messageFromObject ?? messageFromString ?? "unknown error"}`);
|
||||
return;
|
||||
}
|
||||
|
||||
const data =
|
||||
response?.body && typeof response.body === "object" ? (response.body.data ?? []) : [];
|
||||
const embedding = data[0]?.embedding ?? [];
|
||||
if (embedding.length === 0) {
|
||||
params.errors.push(`${customId}: empty embedding`);
|
||||
return;
|
||||
}
|
||||
params.byCustomId.set(customId, embedding);
|
||||
}
|
||||
export * from "../../../packages/memory-host-sdk/src/host/batch-output.js";
|
||||
|
||||
@@ -1,69 +1 @@
|
||||
const TERMINAL_FAILURE_STATES = new Set(["failed", "expired", "cancelled", "canceled"]);
|
||||
|
||||
type BatchStatusLike = {
|
||||
id?: string;
|
||||
status?: string;
|
||||
output_file_id?: string | null;
|
||||
error_file_id?: string | null;
|
||||
};
|
||||
|
||||
export type BatchCompletionResult = {
|
||||
outputFileId: string;
|
||||
errorFileId?: string;
|
||||
};
|
||||
|
||||
export function resolveBatchCompletionFromStatus(params: {
|
||||
provider: string;
|
||||
batchId: string;
|
||||
status: BatchStatusLike;
|
||||
}): BatchCompletionResult {
|
||||
if (!params.status.output_file_id) {
|
||||
throw new Error(`${params.provider} batch ${params.batchId} completed without output file`);
|
||||
}
|
||||
return {
|
||||
outputFileId: params.status.output_file_id,
|
||||
errorFileId: params.status.error_file_id ?? undefined,
|
||||
};
|
||||
}
|
||||
|
||||
export async function throwIfBatchTerminalFailure(params: {
|
||||
provider: string;
|
||||
status: BatchStatusLike;
|
||||
readError: (errorFileId: string) => Promise<string | undefined>;
|
||||
}): Promise<void> {
|
||||
const state = params.status.status ?? "unknown";
|
||||
if (!TERMINAL_FAILURE_STATES.has(state)) {
|
||||
return;
|
||||
}
|
||||
const detail = params.status.error_file_id
|
||||
? await params.readError(params.status.error_file_id)
|
||||
: undefined;
|
||||
const suffix = detail ? `: ${detail}` : "";
|
||||
throw new Error(`${params.provider} batch ${params.status.id ?? "<unknown>"} ${state}${suffix}`);
|
||||
}
|
||||
|
||||
export async function resolveCompletedBatchResult(params: {
|
||||
provider: string;
|
||||
status: BatchStatusLike;
|
||||
wait: boolean;
|
||||
waitForBatch: () => Promise<BatchCompletionResult>;
|
||||
}): Promise<BatchCompletionResult> {
|
||||
const batchId = params.status.id ?? "<unknown>";
|
||||
if (!params.wait && params.status.status !== "completed") {
|
||||
throw new Error(
|
||||
`${params.provider} batch ${batchId} submitted; enable remote.batch.wait to await completion`,
|
||||
);
|
||||
}
|
||||
const completed =
|
||||
params.status.status === "completed"
|
||||
? resolveBatchCompletionFromStatus({
|
||||
provider: params.provider,
|
||||
batchId,
|
||||
status: params.status,
|
||||
})
|
||||
: await params.waitForBatch();
|
||||
if (!completed.outputFileId) {
|
||||
throw new Error(`${params.provider} batch ${batchId} completed without output file`);
|
||||
}
|
||||
return completed;
|
||||
}
|
||||
export * from "../../../packages/memory-host-sdk/src/host/batch-status.js";
|
||||
|
||||
@@ -1,85 +1 @@
|
||||
import type { EmbeddingInput } from "./embedding-inputs.js";
|
||||
|
||||
// Helpers for enforcing embedding model input size limits.
|
||||
//
|
||||
// We use UTF-8 byte length as a conservative upper bound for tokenizer output.
|
||||
// Tokenizers operate over bytes; a token must contain at least one byte, so
|
||||
// token_count <= utf8_byte_length.
|
||||
|
||||
export function estimateUtf8Bytes(text: string): number {
|
||||
if (!text) {
|
||||
return 0;
|
||||
}
|
||||
return Buffer.byteLength(text, "utf8");
|
||||
}
|
||||
|
||||
export function estimateStructuredEmbeddingInputBytes(input: EmbeddingInput): number {
|
||||
if (!input.parts?.length) {
|
||||
return estimateUtf8Bytes(input.text);
|
||||
}
|
||||
let total = 0;
|
||||
for (const part of input.parts) {
|
||||
if (part.type === "text") {
|
||||
total += estimateUtf8Bytes(part.text);
|
||||
continue;
|
||||
}
|
||||
total += estimateUtf8Bytes(part.mimeType);
|
||||
total += estimateUtf8Bytes(part.data);
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
export function splitTextToUtf8ByteLimit(text: string, maxUtf8Bytes: number): string[] {
|
||||
if (maxUtf8Bytes <= 0) {
|
||||
return [text];
|
||||
}
|
||||
if (estimateUtf8Bytes(text) <= maxUtf8Bytes) {
|
||||
return [text];
|
||||
}
|
||||
|
||||
const parts: string[] = [];
|
||||
let cursor = 0;
|
||||
while (cursor < text.length) {
|
||||
// The number of UTF-16 code units is always <= the number of UTF-8 bytes.
|
||||
// This makes `cursor + maxUtf8Bytes` a safe upper bound on the next split point.
|
||||
let low = cursor + 1;
|
||||
let high = Math.min(text.length, cursor + maxUtf8Bytes);
|
||||
let best = cursor;
|
||||
|
||||
while (low <= high) {
|
||||
const mid = Math.floor((low + high) / 2);
|
||||
const bytes = estimateUtf8Bytes(text.slice(cursor, mid));
|
||||
if (bytes <= maxUtf8Bytes) {
|
||||
best = mid;
|
||||
low = mid + 1;
|
||||
} else {
|
||||
high = mid - 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (best <= cursor) {
|
||||
best = Math.min(text.length, cursor + 1);
|
||||
}
|
||||
|
||||
// Avoid splitting inside a surrogate pair.
|
||||
if (
|
||||
best < text.length &&
|
||||
best > cursor &&
|
||||
text.charCodeAt(best - 1) >= 0xd800 &&
|
||||
text.charCodeAt(best - 1) <= 0xdbff &&
|
||||
text.charCodeAt(best) >= 0xdc00 &&
|
||||
text.charCodeAt(best) <= 0xdfff
|
||||
) {
|
||||
best -= 1;
|
||||
}
|
||||
|
||||
const part = text.slice(cursor, best);
|
||||
if (!part) {
|
||||
break;
|
||||
}
|
||||
parts.push(part);
|
||||
cursor = best;
|
||||
}
|
||||
|
||||
return parts;
|
||||
}
|
||||
export * from "../../../packages/memory-host-sdk/src/host/embedding-input-limits.js";
|
||||
|
||||
@@ -1,65 +1 @@
|
||||
import type { SsrFPolicy } from "../../infra/net/ssrf.js";
|
||||
import {
|
||||
resolveRemoteEmbeddingBearerClient,
|
||||
type RemoteEmbeddingProviderId,
|
||||
} from "./embeddings-remote-client.js";
|
||||
import { fetchRemoteEmbeddingVectors } from "./embeddings-remote-fetch.js";
|
||||
import type { EmbeddingProvider, EmbeddingProviderOptions } from "./embeddings.types.js";
|
||||
|
||||
export type RemoteEmbeddingClient = {
|
||||
baseUrl: string;
|
||||
headers: Record<string, string>;
|
||||
ssrfPolicy?: SsrFPolicy;
|
||||
fetchImpl?: typeof fetch;
|
||||
model: string;
|
||||
};
|
||||
|
||||
export function createRemoteEmbeddingProvider(params: {
|
||||
id: string;
|
||||
client: RemoteEmbeddingClient;
|
||||
errorPrefix: string;
|
||||
maxInputTokens?: number;
|
||||
}): EmbeddingProvider {
|
||||
const { client } = params;
|
||||
const url = `${client.baseUrl.replace(/\/$/, "")}/embeddings`;
|
||||
|
||||
const embed = async (input: string[]): Promise<number[][]> => {
|
||||
if (input.length === 0) {
|
||||
return [];
|
||||
}
|
||||
return await fetchRemoteEmbeddingVectors({
|
||||
url,
|
||||
headers: client.headers,
|
||||
ssrfPolicy: client.ssrfPolicy,
|
||||
fetchImpl: client.fetchImpl,
|
||||
body: { model: client.model, input },
|
||||
errorPrefix: params.errorPrefix,
|
||||
});
|
||||
};
|
||||
|
||||
return {
|
||||
id: params.id,
|
||||
model: client.model,
|
||||
...(typeof params.maxInputTokens === "number" ? { maxInputTokens: params.maxInputTokens } : {}),
|
||||
embedQuery: async (text) => {
|
||||
const [vec] = await embed([text]);
|
||||
return vec ?? [];
|
||||
},
|
||||
embedBatch: embed,
|
||||
};
|
||||
}
|
||||
|
||||
export async function resolveRemoteEmbeddingClient(params: {
|
||||
provider: RemoteEmbeddingProviderId;
|
||||
options: EmbeddingProviderOptions;
|
||||
defaultBaseUrl: string;
|
||||
normalizeModel: (model: string) => string;
|
||||
}): Promise<RemoteEmbeddingClient> {
|
||||
const { baseUrl, headers, ssrfPolicy } = await resolveRemoteEmbeddingBearerClient({
|
||||
provider: params.provider,
|
||||
options: params.options,
|
||||
defaultBaseUrl: params.defaultBaseUrl,
|
||||
});
|
||||
const model = params.normalizeModel(params.options.model);
|
||||
return { baseUrl, headers, ssrfPolicy, model };
|
||||
}
|
||||
export * from "../../../packages/memory-host-sdk/src/host/embeddings-remote-provider.js";
|
||||
|
||||
@@ -1,525 +1 @@
|
||||
import crypto from "node:crypto";
|
||||
import fsSync from "node:fs";
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { detectMime } from "../../media/mime.js";
|
||||
import {
|
||||
CANONICAL_ROOT_MEMORY_FILENAME,
|
||||
resolveCanonicalRootMemoryFile,
|
||||
shouldSkipRootMemoryAuxiliaryPath,
|
||||
} from "../../memory/root-memory-files.js";
|
||||
import { CHARS_PER_TOKEN_ESTIMATE, estimateStringChars } from "../../utils/cjk-chars.js";
|
||||
import { runTasksWithConcurrency } from "../../utils/run-with-concurrency.js";
|
||||
import { estimateStructuredEmbeddingInputBytes } from "./embedding-input-limits.js";
|
||||
import { buildTextEmbeddingInput, type EmbeddingInput } from "./embedding-inputs.js";
|
||||
import { isFileMissingError } from "./fs-utils.js";
|
||||
import {
|
||||
buildMemoryMultimodalLabel,
|
||||
classifyMemoryMultimodalPath,
|
||||
type MemoryMultimodalModality,
|
||||
type MemoryMultimodalSettings,
|
||||
} from "./multimodal.js";
|
||||
|
||||
export { hashText } from "./hash.js";
|
||||
import { hashText } from "./hash.js";
|
||||
|
||||
export type MemoryFileEntry = {
|
||||
path: string;
|
||||
absPath: string;
|
||||
mtimeMs: number;
|
||||
size: number;
|
||||
hash: string;
|
||||
dataHash?: string;
|
||||
kind?: "markdown" | "multimodal";
|
||||
contentText?: string;
|
||||
modality?: MemoryMultimodalModality;
|
||||
mimeType?: string;
|
||||
};
|
||||
|
||||
export type MemoryChunk = {
|
||||
startLine: number;
|
||||
endLine: number;
|
||||
text: string;
|
||||
hash: string;
|
||||
embeddingInput?: EmbeddingInput;
|
||||
};
|
||||
|
||||
export type MultimodalMemoryChunk = {
|
||||
chunk: MemoryChunk;
|
||||
structuredInputBytes: number;
|
||||
};
|
||||
|
||||
const DISABLED_MULTIMODAL_SETTINGS: MemoryMultimodalSettings = {
|
||||
enabled: false,
|
||||
modalities: [],
|
||||
maxFileBytes: 0,
|
||||
};
|
||||
|
||||
export function ensureDir(dir: string): string {
|
||||
try {
|
||||
fsSync.mkdirSync(dir, { recursive: true });
|
||||
} catch {}
|
||||
return dir;
|
||||
}
|
||||
|
||||
export function normalizeRelPath(value: string): string {
|
||||
const trimmed = value.trim().replace(/^[./]+/, "");
|
||||
return trimmed.replace(/\\/g, "/");
|
||||
}
|
||||
|
||||
export function normalizeExtraMemoryPaths(workspaceDir: string, extraPaths?: string[]): string[] {
|
||||
if (!extraPaths?.length) {
|
||||
return [];
|
||||
}
|
||||
const resolved = extraPaths
|
||||
.map((value) => value.trim())
|
||||
.filter(Boolean)
|
||||
.map((value) =>
|
||||
path.isAbsolute(value) ? path.resolve(value) : path.resolve(workspaceDir, value),
|
||||
);
|
||||
return Array.from(new Set(resolved));
|
||||
}
|
||||
|
||||
export function isMemoryPath(relPath: string): boolean {
|
||||
const normalized = normalizeRelPath(relPath);
|
||||
if (!normalized) {
|
||||
return false;
|
||||
}
|
||||
if (normalized === CANONICAL_ROOT_MEMORY_FILENAME || normalized === "DREAMS.md") {
|
||||
return true;
|
||||
}
|
||||
return normalized.startsWith("memory/");
|
||||
}
|
||||
|
||||
function isAllowedMemoryFilePath(filePath: string, multimodal?: MemoryMultimodalSettings): boolean {
|
||||
if (filePath.endsWith(".md")) {
|
||||
return true;
|
||||
}
|
||||
return (
|
||||
classifyMemoryMultimodalPath(filePath, multimodal ?? DISABLED_MULTIMODAL_SETTINGS) !== null
|
||||
);
|
||||
}
|
||||
|
||||
async function walkDir(
|
||||
dir: string,
|
||||
files: string[],
|
||||
multimodal?: MemoryMultimodalSettings,
|
||||
shouldSkipPath?: (absPath: string) => boolean,
|
||||
) {
|
||||
const entries = await fs.readdir(dir, { withFileTypes: true });
|
||||
for (const entry of entries) {
|
||||
const full = path.join(dir, entry.name);
|
||||
if (shouldSkipPath?.(full)) {
|
||||
continue;
|
||||
}
|
||||
if (entry.isSymbolicLink()) {
|
||||
continue;
|
||||
}
|
||||
if (entry.isDirectory()) {
|
||||
if (entry.name === ".openclaw-repair") {
|
||||
continue;
|
||||
}
|
||||
await walkDir(full, files, multimodal, shouldSkipPath);
|
||||
continue;
|
||||
}
|
||||
if (!entry.isFile()) {
|
||||
continue;
|
||||
}
|
||||
if (!isAllowedMemoryFilePath(full, multimodal)) {
|
||||
continue;
|
||||
}
|
||||
files.push(full);
|
||||
}
|
||||
}
|
||||
|
||||
export async function listMemoryFiles(
|
||||
workspaceDir: string,
|
||||
extraPaths?: string[],
|
||||
multimodal?: MemoryMultimodalSettings,
|
||||
): Promise<string[]> {
|
||||
const result: string[] = [];
|
||||
const memoryDir = path.join(workspaceDir, "memory");
|
||||
|
||||
const shouldSkipWorkspaceMemoryPath = (absPath: string): boolean =>
|
||||
shouldSkipRootMemoryAuxiliaryPath({ workspaceDir, absPath });
|
||||
|
||||
const addMarkdownFile = async (absPath: string) => {
|
||||
try {
|
||||
const stat = await fs.lstat(absPath);
|
||||
if (stat.isSymbolicLink() || !stat.isFile()) {
|
||||
return;
|
||||
}
|
||||
if (!absPath.endsWith(".md")) {
|
||||
return;
|
||||
}
|
||||
result.push(absPath);
|
||||
} catch {}
|
||||
};
|
||||
|
||||
const memoryFile = await resolveCanonicalRootMemoryFile(workspaceDir);
|
||||
if (memoryFile) {
|
||||
await addMarkdownFile(memoryFile);
|
||||
}
|
||||
try {
|
||||
const dirStat = await fs.lstat(memoryDir);
|
||||
if (!dirStat.isSymbolicLink() && dirStat.isDirectory()) {
|
||||
await walkDir(memoryDir, result, multimodal, shouldSkipWorkspaceMemoryPath);
|
||||
}
|
||||
} catch {}
|
||||
|
||||
const normalizedExtraPaths = normalizeExtraMemoryPaths(workspaceDir, extraPaths);
|
||||
if (normalizedExtraPaths.length > 0) {
|
||||
for (const inputPath of normalizedExtraPaths) {
|
||||
if (shouldSkipWorkspaceMemoryPath(inputPath)) {
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
const stat = await fs.lstat(inputPath);
|
||||
if (stat.isSymbolicLink()) {
|
||||
continue;
|
||||
}
|
||||
if (stat.isDirectory()) {
|
||||
await walkDir(inputPath, result, multimodal, shouldSkipWorkspaceMemoryPath);
|
||||
continue;
|
||||
}
|
||||
if (stat.isFile() && isAllowedMemoryFilePath(inputPath, multimodal)) {
|
||||
result.push(inputPath);
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
}
|
||||
if (result.length <= 1) {
|
||||
return result;
|
||||
}
|
||||
const seen = new Set<string>();
|
||||
const deduped: string[] = [];
|
||||
for (const entry of result) {
|
||||
let key = entry;
|
||||
try {
|
||||
key = await fs.realpath(entry);
|
||||
} catch {}
|
||||
if (seen.has(key)) {
|
||||
continue;
|
||||
}
|
||||
seen.add(key);
|
||||
deduped.push(entry);
|
||||
}
|
||||
return deduped;
|
||||
}
|
||||
|
||||
export async function buildFileEntry(
|
||||
absPath: string,
|
||||
workspaceDir: string,
|
||||
multimodal?: MemoryMultimodalSettings,
|
||||
): Promise<MemoryFileEntry | null> {
|
||||
let stat;
|
||||
try {
|
||||
stat = await fs.stat(absPath);
|
||||
} catch (err) {
|
||||
if (isFileMissingError(err)) {
|
||||
return null;
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
const normalizedPath = path.relative(workspaceDir, absPath).replace(/\\/g, "/");
|
||||
const multimodalSettings = multimodal ?? DISABLED_MULTIMODAL_SETTINGS;
|
||||
const modality = classifyMemoryMultimodalPath(absPath, multimodalSettings);
|
||||
if (modality) {
|
||||
if (stat.size > multimodalSettings.maxFileBytes) {
|
||||
return null;
|
||||
}
|
||||
let buffer: Buffer;
|
||||
try {
|
||||
buffer = await fs.readFile(absPath);
|
||||
} catch (err) {
|
||||
if (isFileMissingError(err)) {
|
||||
return null;
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
const mimeType = await detectMime({ buffer: buffer.subarray(0, 512), filePath: absPath });
|
||||
if (!mimeType || !mimeType.startsWith(`${modality}/`)) {
|
||||
return null;
|
||||
}
|
||||
const contentText = buildMemoryMultimodalLabel(modality, normalizedPath);
|
||||
const dataHash = crypto.createHash("sha256").update(buffer).digest("hex");
|
||||
const chunkHash = hashText(
|
||||
JSON.stringify({
|
||||
path: normalizedPath,
|
||||
contentText,
|
||||
mimeType,
|
||||
dataHash,
|
||||
}),
|
||||
);
|
||||
return {
|
||||
path: normalizedPath,
|
||||
absPath,
|
||||
mtimeMs: stat.mtimeMs,
|
||||
size: stat.size,
|
||||
hash: chunkHash,
|
||||
dataHash,
|
||||
kind: "multimodal",
|
||||
contentText,
|
||||
modality,
|
||||
mimeType,
|
||||
};
|
||||
}
|
||||
let content: string;
|
||||
try {
|
||||
content = await fs.readFile(absPath, "utf-8");
|
||||
} catch (err) {
|
||||
if (isFileMissingError(err)) {
|
||||
return null;
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
const hash = hashText(content);
|
||||
return {
|
||||
path: normalizedPath,
|
||||
absPath,
|
||||
mtimeMs: stat.mtimeMs,
|
||||
size: stat.size,
|
||||
hash,
|
||||
kind: "markdown",
|
||||
};
|
||||
}
|
||||
|
||||
async function loadMultimodalEmbeddingInput(
|
||||
entry: Pick<
|
||||
MemoryFileEntry,
|
||||
"absPath" | "contentText" | "mimeType" | "kind" | "size" | "dataHash"
|
||||
>,
|
||||
): Promise<EmbeddingInput | null> {
|
||||
if (entry.kind !== "multimodal" || !entry.contentText || !entry.mimeType) {
|
||||
return null;
|
||||
}
|
||||
let stat;
|
||||
try {
|
||||
stat = await fs.stat(entry.absPath);
|
||||
} catch (err) {
|
||||
if (isFileMissingError(err)) {
|
||||
return null;
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
if (stat.size !== entry.size) {
|
||||
return null;
|
||||
}
|
||||
let buffer: Buffer;
|
||||
try {
|
||||
buffer = await fs.readFile(entry.absPath);
|
||||
} catch (err) {
|
||||
if (isFileMissingError(err)) {
|
||||
return null;
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
const dataHash = crypto.createHash("sha256").update(buffer).digest("hex");
|
||||
if (entry.dataHash && entry.dataHash !== dataHash) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
text: entry.contentText,
|
||||
parts: [
|
||||
{ type: "text", text: entry.contentText },
|
||||
{
|
||||
type: "inline-data",
|
||||
mimeType: entry.mimeType,
|
||||
data: buffer.toString("base64"),
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
export async function buildMultimodalChunkForIndexing(
|
||||
entry: Pick<
|
||||
MemoryFileEntry,
|
||||
"absPath" | "contentText" | "mimeType" | "kind" | "hash" | "size" | "dataHash"
|
||||
>,
|
||||
): Promise<MultimodalMemoryChunk | null> {
|
||||
const embeddingInput = await loadMultimodalEmbeddingInput(entry);
|
||||
if (!embeddingInput) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
chunk: {
|
||||
startLine: 1,
|
||||
endLine: 1,
|
||||
text: entry.contentText ?? embeddingInput.text,
|
||||
hash: entry.hash,
|
||||
embeddingInput,
|
||||
},
|
||||
structuredInputBytes: estimateStructuredEmbeddingInputBytes(embeddingInput),
|
||||
};
|
||||
}
|
||||
|
||||
export function chunkMarkdown(
|
||||
content: string,
|
||||
chunking: { tokens: number; overlap: number },
|
||||
): MemoryChunk[] {
|
||||
const lines = content.split("\n");
|
||||
if (lines.length === 0) {
|
||||
return [];
|
||||
}
|
||||
const maxChars = Math.max(32, chunking.tokens * CHARS_PER_TOKEN_ESTIMATE);
|
||||
const overlapChars = Math.max(0, chunking.overlap * CHARS_PER_TOKEN_ESTIMATE);
|
||||
const chunks: MemoryChunk[] = [];
|
||||
|
||||
let current: Array<{ line: string; lineNo: number }> = [];
|
||||
let currentChars = 0;
|
||||
|
||||
const flush = () => {
|
||||
if (current.length === 0) {
|
||||
return;
|
||||
}
|
||||
const firstEntry = current[0];
|
||||
const lastEntry = current[current.length - 1];
|
||||
if (!firstEntry || !lastEntry) {
|
||||
return;
|
||||
}
|
||||
const text = current.map((entry) => entry.line).join("\n");
|
||||
const startLine = firstEntry.lineNo;
|
||||
const endLine = lastEntry.lineNo;
|
||||
chunks.push({
|
||||
startLine,
|
||||
endLine,
|
||||
text,
|
||||
hash: hashText(text),
|
||||
embeddingInput: buildTextEmbeddingInput(text),
|
||||
});
|
||||
};
|
||||
|
||||
const carryOverlap = () => {
|
||||
if (overlapChars <= 0 || current.length === 0) {
|
||||
current = [];
|
||||
currentChars = 0;
|
||||
return;
|
||||
}
|
||||
let acc = 0;
|
||||
const kept: Array<{ line: string; lineNo: number }> = [];
|
||||
for (let i = current.length - 1; i >= 0; i -= 1) {
|
||||
const entry = current[i];
|
||||
if (!entry) {
|
||||
continue;
|
||||
}
|
||||
acc += estimateStringChars(entry.line) + 1;
|
||||
kept.unshift(entry);
|
||||
if (acc >= overlapChars) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
current = kept;
|
||||
currentChars = acc;
|
||||
};
|
||||
|
||||
for (let i = 0; i < lines.length; i += 1) {
|
||||
const line = lines[i] ?? "";
|
||||
const lineNo = i + 1;
|
||||
const segments: string[] = [];
|
||||
if (line.length === 0) {
|
||||
segments.push("");
|
||||
} else {
|
||||
// First pass: slice at maxChars (preserves original behaviour for Latin).
|
||||
// Second pass: if a segment's *weighted* size still exceeds the budget
|
||||
// (happens for CJK-heavy text where 1 char ≈ 1 token), re-split it at
|
||||
// chunking.tokens so the chunk stays within the token budget.
|
||||
for (let start = 0; start < line.length; start += maxChars) {
|
||||
const coarse = line.slice(start, start + maxChars);
|
||||
if (estimateStringChars(coarse) > maxChars) {
|
||||
const fineStep = Math.max(1, chunking.tokens);
|
||||
for (let j = 0; j < coarse.length; ) {
|
||||
let end = Math.min(j + fineStep, coarse.length);
|
||||
// Avoid splitting inside a UTF-16 surrogate pair (CJK Extension B+).
|
||||
if (end < coarse.length) {
|
||||
const code = coarse.charCodeAt(end - 1);
|
||||
if (code >= 0xd800 && code <= 0xdbff) {
|
||||
end += 1; // include the low surrogate
|
||||
}
|
||||
}
|
||||
segments.push(coarse.slice(j, end));
|
||||
j = end; // advance cursor to the adjusted boundary
|
||||
}
|
||||
} else {
|
||||
segments.push(coarse);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (const segment of segments) {
|
||||
const lineSize = estimateStringChars(segment) + 1;
|
||||
if (currentChars + lineSize > maxChars && current.length > 0) {
|
||||
flush();
|
||||
carryOverlap();
|
||||
}
|
||||
current.push({ line: segment, lineNo });
|
||||
currentChars += lineSize;
|
||||
}
|
||||
}
|
||||
flush();
|
||||
return chunks;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remap chunk startLine/endLine from content-relative positions to original
|
||||
* source file positions using a lineMap. Each entry in lineMap gives the
|
||||
* 1-indexed source line for the corresponding 0-indexed content line.
|
||||
*
|
||||
* This is used for session JSONL files where buildSessionEntry() flattens
|
||||
* messages into a plain-text string before chunking. Without remapping the
|
||||
* stored line numbers would reference positions in the flattened text rather
|
||||
* than the original JSONL file.
|
||||
*/
|
||||
export function remapChunkLines(chunks: MemoryChunk[], lineMap: number[] | undefined): void {
|
||||
if (!lineMap || lineMap.length === 0) {
|
||||
return;
|
||||
}
|
||||
for (const chunk of chunks) {
|
||||
// startLine/endLine are 1-indexed; lineMap is 0-indexed by content line
|
||||
chunk.startLine = lineMap[chunk.startLine - 1] ?? chunk.startLine;
|
||||
chunk.endLine = lineMap[chunk.endLine - 1] ?? chunk.endLine;
|
||||
}
|
||||
}
|
||||
|
||||
export function parseEmbedding(raw: string): number[] {
|
||||
try {
|
||||
const parsed = JSON.parse(raw) as number[];
|
||||
return Array.isArray(parsed) ? parsed : [];
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
export function cosineSimilarity(a: number[], b: number[]): number {
|
||||
if (a.length === 0 || b.length === 0) {
|
||||
return 0;
|
||||
}
|
||||
const len = Math.min(a.length, b.length);
|
||||
let dot = 0;
|
||||
let normA = 0;
|
||||
let normB = 0;
|
||||
for (let i = 0; i < len; i += 1) {
|
||||
const av = a[i] ?? 0;
|
||||
const bv = b[i] ?? 0;
|
||||
dot += av * bv;
|
||||
normA += av * av;
|
||||
normB += bv * bv;
|
||||
}
|
||||
if (normA === 0 || normB === 0) {
|
||||
return 0;
|
||||
}
|
||||
return dot / (Math.sqrt(normA) * Math.sqrt(normB));
|
||||
}
|
||||
|
||||
export async function runWithConcurrency<T>(
|
||||
tasks: Array<() => Promise<T>>,
|
||||
limit: number,
|
||||
): Promise<T[]> {
|
||||
const { results, firstError, hasError } = await runTasksWithConcurrency({
|
||||
tasks,
|
||||
limit,
|
||||
errorMode: "stop",
|
||||
});
|
||||
if (hasError) {
|
||||
throw firstError;
|
||||
}
|
||||
return results;
|
||||
}
|
||||
export * from "../../../packages/memory-host-sdk/src/host/internal.js";
|
||||
|
||||
@@ -1,103 +1 @@
|
||||
import type { DatabaseSync } from "node:sqlite";
|
||||
import { formatErrorMessage } from "../../infra/errors.js";
|
||||
|
||||
export function ensureMemoryIndexSchema(params: {
|
||||
db: DatabaseSync;
|
||||
embeddingCacheTable: string;
|
||||
cacheEnabled: boolean;
|
||||
ftsTable: string;
|
||||
ftsEnabled: boolean;
|
||||
ftsTokenizer?: "unicode61" | "trigram";
|
||||
}): { ftsAvailable: boolean; ftsError?: string } {
|
||||
params.db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS meta (
|
||||
key TEXT PRIMARY KEY,
|
||||
value TEXT NOT NULL
|
||||
);
|
||||
`);
|
||||
params.db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS files (
|
||||
path TEXT PRIMARY KEY,
|
||||
source TEXT NOT NULL DEFAULT 'memory',
|
||||
hash TEXT NOT NULL,
|
||||
mtime INTEGER NOT NULL,
|
||||
size INTEGER NOT NULL
|
||||
);
|
||||
`);
|
||||
params.db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS chunks (
|
||||
id TEXT PRIMARY KEY,
|
||||
path TEXT NOT NULL,
|
||||
source TEXT NOT NULL DEFAULT 'memory',
|
||||
start_line INTEGER NOT NULL,
|
||||
end_line INTEGER NOT NULL,
|
||||
hash TEXT NOT NULL,
|
||||
model TEXT NOT NULL,
|
||||
text TEXT NOT NULL,
|
||||
embedding TEXT NOT NULL,
|
||||
updated_at INTEGER NOT NULL
|
||||
);
|
||||
`);
|
||||
if (params.cacheEnabled) {
|
||||
params.db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS ${params.embeddingCacheTable} (
|
||||
provider TEXT NOT NULL,
|
||||
model TEXT NOT NULL,
|
||||
provider_key TEXT NOT NULL,
|
||||
hash TEXT NOT NULL,
|
||||
embedding TEXT NOT NULL,
|
||||
dims INTEGER,
|
||||
updated_at INTEGER NOT NULL,
|
||||
PRIMARY KEY (provider, model, provider_key, hash)
|
||||
);
|
||||
`);
|
||||
params.db.exec(
|
||||
`CREATE INDEX IF NOT EXISTS idx_embedding_cache_updated_at ON ${params.embeddingCacheTable}(updated_at);`,
|
||||
);
|
||||
}
|
||||
|
||||
let ftsAvailable = false;
|
||||
let ftsError: string | undefined;
|
||||
if (params.ftsEnabled) {
|
||||
try {
|
||||
const tokenizer = params.ftsTokenizer ?? "unicode61";
|
||||
const tokenizeClause = tokenizer === "trigram" ? `, tokenize='trigram case_sensitive 0'` : "";
|
||||
params.db.exec(
|
||||
`CREATE VIRTUAL TABLE IF NOT EXISTS ${params.ftsTable} USING fts5(\n` +
|
||||
` text,\n` +
|
||||
` id UNINDEXED,\n` +
|
||||
` path UNINDEXED,\n` +
|
||||
` source UNINDEXED,\n` +
|
||||
` model UNINDEXED,\n` +
|
||||
` start_line UNINDEXED,\n` +
|
||||
` end_line UNINDEXED\n` +
|
||||
`${tokenizeClause});`,
|
||||
);
|
||||
ftsAvailable = true;
|
||||
} catch (err) {
|
||||
const message = formatErrorMessage(err);
|
||||
ftsAvailable = false;
|
||||
ftsError = message;
|
||||
}
|
||||
}
|
||||
|
||||
ensureColumn(params.db, "files", "source", "TEXT NOT NULL DEFAULT 'memory'");
|
||||
ensureColumn(params.db, "chunks", "source", "TEXT NOT NULL DEFAULT 'memory'");
|
||||
params.db.exec(`CREATE INDEX IF NOT EXISTS idx_chunks_path ON chunks(path);`);
|
||||
params.db.exec(`CREATE INDEX IF NOT EXISTS idx_chunks_source ON chunks(source);`);
|
||||
|
||||
return { ftsAvailable, ...(ftsError ? { ftsError } : {}) };
|
||||
}
|
||||
|
||||
function ensureColumn(
|
||||
db: DatabaseSync,
|
||||
table: "files" | "chunks",
|
||||
column: string,
|
||||
definition: string,
|
||||
): void {
|
||||
const rows = db.prepare(`PRAGMA table_info(${table})`).all() as Array<{ name: string }>;
|
||||
if (rows.some((row) => row.name === column)) {
|
||||
return;
|
||||
}
|
||||
db.exec(`ALTER TABLE ${table} ADD COLUMN ${column} ${definition}`);
|
||||
}
|
||||
export * from "../../../packages/memory-host-sdk/src/host/memory-schema.js";
|
||||
|
||||
@@ -4,22 +4,37 @@ import { fileURLToPath } from "node:url";
|
||||
import { describe, expect, it } from "vitest";
|
||||
|
||||
const HOST_DIR = path.dirname(fileURLToPath(import.meta.url));
|
||||
const REPO_ROOT = path.resolve(HOST_DIR, "../../..");
|
||||
const PACKAGE_HOST_DIR = path.join(REPO_ROOT, "packages/memory-host-sdk/src/host");
|
||||
|
||||
const PACKAGE_COVERED_MIRRORS = [
|
||||
const PACKAGE_BRIDGE_FILES = [
|
||||
"backend-config.ts",
|
||||
"batch-error-utils.ts",
|
||||
"batch-output.ts",
|
||||
"batch-status.ts",
|
||||
"embedding-chunk-limits.ts",
|
||||
"embeddings-model-normalize.ts",
|
||||
"embedding-input-limits.ts",
|
||||
"embeddings-remote-provider.ts",
|
||||
"embeddings.ts",
|
||||
"internal.ts",
|
||||
"memory-schema.ts",
|
||||
"multimodal.ts",
|
||||
"qmd-process.ts",
|
||||
"qmd-scope.ts",
|
||||
"query-expansion.ts",
|
||||
"read-file-shared.ts",
|
||||
"read-file.ts",
|
||||
"session-files.ts",
|
||||
"types.ts",
|
||||
] as const;
|
||||
|
||||
describe("memory-host-sdk mirrored host modules", () => {
|
||||
it("keeps package-covered source mirrors byte-identical", () => {
|
||||
for (const fileName of PACKAGE_COVERED_MIRRORS) {
|
||||
const srcSource = fs.readFileSync(path.join(HOST_DIR, fileName), "utf8");
|
||||
const packageSource = fs.readFileSync(path.join(PACKAGE_HOST_DIR, fileName), "utf8");
|
||||
expect(srcSource, fileName).toBe(packageSource);
|
||||
describe("memory-host-sdk host package bridges", () => {
|
||||
it("keeps package-owned source bridges thin", () => {
|
||||
for (const fileName of PACKAGE_BRIDGE_FILES) {
|
||||
const source = fs.readFileSync(path.join(HOST_DIR, fileName), "utf8");
|
||||
expect(source, fileName).toBe(
|
||||
`export * from "../../../packages/memory-host-sdk/src/host/${fileName.replace(
|
||||
/\.ts$/u,
|
||||
".js",
|
||||
)}";\n`,
|
||||
);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,108 +1 @@
|
||||
import {
|
||||
lowercasePreservingWhitespace,
|
||||
normalizeLowercaseStringOrEmpty,
|
||||
} from "../../shared/string-coerce.js";
|
||||
|
||||
const MEMORY_MULTIMODAL_SPECS = {
|
||||
image: {
|
||||
labelPrefix: "Image file",
|
||||
extensions: [".jpg", ".jpeg", ".png", ".webp", ".gif", ".heic", ".heif"],
|
||||
},
|
||||
audio: {
|
||||
labelPrefix: "Audio file",
|
||||
extensions: [".mp3", ".wav", ".ogg", ".opus", ".m4a", ".aac", ".flac"],
|
||||
},
|
||||
} as const;
|
||||
|
||||
export type MemoryMultimodalModality = keyof typeof MEMORY_MULTIMODAL_SPECS;
|
||||
export const MEMORY_MULTIMODAL_MODALITIES = Object.keys(
|
||||
MEMORY_MULTIMODAL_SPECS,
|
||||
) as MemoryMultimodalModality[];
|
||||
export type MemoryMultimodalSelection = MemoryMultimodalModality | "all";
|
||||
|
||||
export type MemoryMultimodalSettings = {
|
||||
enabled: boolean;
|
||||
modalities: MemoryMultimodalModality[];
|
||||
maxFileBytes: number;
|
||||
};
|
||||
|
||||
export const DEFAULT_MEMORY_MULTIMODAL_MAX_FILE_BYTES = 10 * 1024 * 1024;
|
||||
|
||||
export function normalizeMemoryMultimodalModalities(
|
||||
raw: MemoryMultimodalSelection[] | undefined,
|
||||
): MemoryMultimodalModality[] {
|
||||
if (raw === undefined || raw.includes("all")) {
|
||||
return [...MEMORY_MULTIMODAL_MODALITIES];
|
||||
}
|
||||
const normalized = new Set<MemoryMultimodalModality>();
|
||||
for (const value of raw) {
|
||||
if (value === "image" || value === "audio") {
|
||||
normalized.add(value);
|
||||
}
|
||||
}
|
||||
return Array.from(normalized);
|
||||
}
|
||||
|
||||
export function normalizeMemoryMultimodalSettings(raw: {
|
||||
enabled?: boolean;
|
||||
modalities?: MemoryMultimodalSelection[];
|
||||
maxFileBytes?: number;
|
||||
}): MemoryMultimodalSettings {
|
||||
const enabled = raw.enabled === true;
|
||||
const maxFileBytes =
|
||||
typeof raw.maxFileBytes === "number" && Number.isFinite(raw.maxFileBytes)
|
||||
? Math.max(1, Math.floor(raw.maxFileBytes))
|
||||
: DEFAULT_MEMORY_MULTIMODAL_MAX_FILE_BYTES;
|
||||
return {
|
||||
enabled,
|
||||
modalities: enabled ? normalizeMemoryMultimodalModalities(raw.modalities) : [],
|
||||
maxFileBytes,
|
||||
};
|
||||
}
|
||||
|
||||
export function isMemoryMultimodalEnabled(settings: MemoryMultimodalSettings): boolean {
|
||||
return settings.enabled && settings.modalities.length > 0;
|
||||
}
|
||||
|
||||
export function getMemoryMultimodalExtensions(
|
||||
modality: MemoryMultimodalModality,
|
||||
): readonly string[] {
|
||||
return MEMORY_MULTIMODAL_SPECS[modality].extensions;
|
||||
}
|
||||
|
||||
export function buildMemoryMultimodalLabel(
|
||||
modality: MemoryMultimodalModality,
|
||||
normalizedPath: string,
|
||||
): string {
|
||||
return `${MEMORY_MULTIMODAL_SPECS[modality].labelPrefix}: ${normalizedPath}`;
|
||||
}
|
||||
|
||||
export function buildCaseInsensitiveExtensionGlob(extension: string): string {
|
||||
const normalized = normalizeLowercaseStringOrEmpty(extension).replace(/^\./, "");
|
||||
if (!normalized) {
|
||||
return "*";
|
||||
}
|
||||
const parts = Array.from(normalized, (char) => {
|
||||
const lower = lowercasePreservingWhitespace(char);
|
||||
return `[${lower}${char.toUpperCase()}]`;
|
||||
});
|
||||
return `*.${parts.join("")}`;
|
||||
}
|
||||
|
||||
export function classifyMemoryMultimodalPath(
|
||||
filePath: string,
|
||||
settings: MemoryMultimodalSettings,
|
||||
): MemoryMultimodalModality | null {
|
||||
if (!isMemoryMultimodalEnabled(settings)) {
|
||||
return null;
|
||||
}
|
||||
const lower = normalizeLowercaseStringOrEmpty(filePath);
|
||||
for (const modality of settings.modalities) {
|
||||
for (const extension of getMemoryMultimodalExtensions(modality)) {
|
||||
if (lower.endsWith(extension)) {
|
||||
return modality;
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
export * from "../../../packages/memory-host-sdk/src/host/multimodal.js";
|
||||
|
||||
@@ -1,184 +1 @@
|
||||
import { spawn } from "node:child_process";
|
||||
import {
|
||||
materializeWindowsSpawnProgram,
|
||||
resolveWindowsSpawnProgram,
|
||||
} from "../../plugin-sdk/windows-spawn.js";
|
||||
|
||||
export type CliSpawnInvocation = {
|
||||
command: string;
|
||||
argv: string[];
|
||||
shell?: boolean;
|
||||
windowsHide?: boolean;
|
||||
};
|
||||
|
||||
export type QmdBinaryAvailability = {
|
||||
available: boolean;
|
||||
error?: string;
|
||||
};
|
||||
|
||||
export function resolveCliSpawnInvocation(params: {
|
||||
command: string;
|
||||
args: string[];
|
||||
env: NodeJS.ProcessEnv;
|
||||
packageName: string;
|
||||
}): CliSpawnInvocation {
|
||||
const program = resolveWindowsSpawnProgram({
|
||||
command: params.command,
|
||||
platform: process.platform,
|
||||
env: params.env,
|
||||
execPath: process.execPath,
|
||||
packageName: params.packageName,
|
||||
allowShellFallback: false,
|
||||
});
|
||||
return materializeWindowsSpawnProgram(program, params.args);
|
||||
}
|
||||
|
||||
export async function checkQmdBinaryAvailability(params: {
|
||||
command: string;
|
||||
env: NodeJS.ProcessEnv;
|
||||
cwd?: string;
|
||||
timeoutMs?: number;
|
||||
}): Promise<QmdBinaryAvailability> {
|
||||
let spawnInvocation: CliSpawnInvocation;
|
||||
try {
|
||||
spawnInvocation = resolveCliSpawnInvocation({
|
||||
command: params.command,
|
||||
args: [],
|
||||
env: params.env,
|
||||
packageName: "qmd",
|
||||
});
|
||||
} catch (err) {
|
||||
return { available: false, error: formatQmdAvailabilityError(err) };
|
||||
}
|
||||
|
||||
return await new Promise((resolve) => {
|
||||
let settled = false;
|
||||
let didSpawn = false;
|
||||
const finish = (result: QmdBinaryAvailability) => {
|
||||
if (settled) {
|
||||
return;
|
||||
}
|
||||
settled = true;
|
||||
if (timer) {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
resolve(result);
|
||||
};
|
||||
|
||||
const child = spawn(spawnInvocation.command, spawnInvocation.argv, {
|
||||
env: params.env,
|
||||
cwd: params.cwd ?? process.cwd(),
|
||||
shell: spawnInvocation.shell,
|
||||
windowsHide: spawnInvocation.windowsHide,
|
||||
stdio: "ignore",
|
||||
});
|
||||
const timer = setTimeout(() => {
|
||||
child.kill("SIGKILL");
|
||||
finish({
|
||||
available: false,
|
||||
error: `spawn ${params.command} timed out after ${params.timeoutMs ?? 2_000}ms`,
|
||||
});
|
||||
}, params.timeoutMs ?? 2_000);
|
||||
|
||||
child.once("error", (err) => {
|
||||
finish({ available: false, error: formatQmdAvailabilityError(err) });
|
||||
});
|
||||
child.once("spawn", () => {
|
||||
didSpawn = true;
|
||||
child.kill();
|
||||
finish({ available: true });
|
||||
});
|
||||
child.once("close", () => {
|
||||
if (!didSpawn) {
|
||||
return;
|
||||
}
|
||||
finish({ available: true });
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
export async function runCliCommand(params: {
|
||||
commandSummary: string;
|
||||
spawnInvocation: CliSpawnInvocation;
|
||||
env: NodeJS.ProcessEnv;
|
||||
cwd: string;
|
||||
timeoutMs?: number;
|
||||
maxOutputChars: number;
|
||||
discardStdout?: boolean;
|
||||
}): Promise<{ stdout: string; stderr: string }> {
|
||||
return await new Promise((resolve, reject) => {
|
||||
const child = spawn(params.spawnInvocation.command, params.spawnInvocation.argv, {
|
||||
env: params.env,
|
||||
cwd: params.cwd,
|
||||
shell: params.spawnInvocation.shell,
|
||||
windowsHide: params.spawnInvocation.windowsHide,
|
||||
});
|
||||
let stdout = "";
|
||||
let stderr = "";
|
||||
let stdoutTruncated = false;
|
||||
let stderrTruncated = false;
|
||||
const discardStdout = params.discardStdout === true;
|
||||
const timer = params.timeoutMs
|
||||
? setTimeout(() => {
|
||||
child.kill("SIGKILL");
|
||||
reject(new Error(`${params.commandSummary} timed out after ${params.timeoutMs}ms`));
|
||||
}, params.timeoutMs)
|
||||
: null;
|
||||
child.stdout.on("data", (data) => {
|
||||
if (discardStdout) {
|
||||
return;
|
||||
}
|
||||
const next = appendOutputWithCap(stdout, data.toString("utf8"), params.maxOutputChars);
|
||||
stdout = next.text;
|
||||
stdoutTruncated = stdoutTruncated || next.truncated;
|
||||
});
|
||||
child.stderr.on("data", (data) => {
|
||||
const next = appendOutputWithCap(stderr, data.toString("utf8"), params.maxOutputChars);
|
||||
stderr = next.text;
|
||||
stderrTruncated = stderrTruncated || next.truncated;
|
||||
});
|
||||
child.on("error", (err) => {
|
||||
if (timer) {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
reject(err);
|
||||
});
|
||||
child.on("close", (code) => {
|
||||
if (timer) {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
if (!discardStdout && (stdoutTruncated || stderrTruncated)) {
|
||||
reject(
|
||||
new Error(
|
||||
`${params.commandSummary} produced too much output (limit ${params.maxOutputChars} chars)`,
|
||||
),
|
||||
);
|
||||
return;
|
||||
}
|
||||
if (code === 0) {
|
||||
resolve({ stdout, stderr });
|
||||
} else {
|
||||
reject(new Error(`${params.commandSummary} failed (code ${code}): ${stderr || stdout}`));
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function appendOutputWithCap(
|
||||
current: string,
|
||||
chunk: string,
|
||||
maxChars: number,
|
||||
): { text: string; truncated: boolean } {
|
||||
const appended = current + chunk;
|
||||
if (appended.length <= maxChars) {
|
||||
return { text: appended, truncated: false };
|
||||
}
|
||||
return { text: appended.slice(-maxChars), truncated: true };
|
||||
}
|
||||
|
||||
function formatQmdAvailabilityError(err: unknown): string {
|
||||
if (err instanceof Error && err.message) {
|
||||
return err.message;
|
||||
}
|
||||
return String(err);
|
||||
}
|
||||
export * from "../../../packages/memory-host-sdk/src/host/qmd-process.js";
|
||||
|
||||
@@ -1,110 +1 @@
|
||||
import { parseAgentSessionKey } from "../../sessions/session-key-utils.js";
|
||||
import {
|
||||
normalizeLowercaseStringOrEmpty,
|
||||
normalizeOptionalLowercaseString,
|
||||
} from "../../shared/string-coerce.js";
|
||||
import type { ResolvedQmdConfig } from "./backend-config.js";
|
||||
|
||||
type ParsedQmdSessionScope = {
|
||||
channel?: string;
|
||||
chatType?: "channel" | "group" | "direct";
|
||||
normalizedKey?: string;
|
||||
};
|
||||
|
||||
export function isQmdScopeAllowed(scope: ResolvedQmdConfig["scope"], sessionKey?: string): boolean {
|
||||
if (!scope) {
|
||||
return true;
|
||||
}
|
||||
const parsed = parseQmdSessionScope(sessionKey);
|
||||
const channel = parsed.channel;
|
||||
const chatType = parsed.chatType;
|
||||
const normalizedKey = parsed.normalizedKey ?? "";
|
||||
const rawKey = normalizeLowercaseStringOrEmpty(sessionKey);
|
||||
for (const rule of scope.rules ?? []) {
|
||||
if (!rule) {
|
||||
continue;
|
||||
}
|
||||
const match = rule.match ?? {};
|
||||
if (match.channel && match.channel !== channel) {
|
||||
continue;
|
||||
}
|
||||
if (match.chatType && match.chatType !== chatType) {
|
||||
continue;
|
||||
}
|
||||
const normalizedPrefix = normalizeOptionalLowercaseString(match.keyPrefix);
|
||||
const rawPrefix = normalizeOptionalLowercaseString(match.rawKeyPrefix);
|
||||
|
||||
if (rawPrefix && !rawKey.startsWith(rawPrefix)) {
|
||||
continue;
|
||||
}
|
||||
if (normalizedPrefix) {
|
||||
// Backward compat: older configs used `keyPrefix: "agent:<id>:..."` to match raw keys.
|
||||
const isLegacyRaw = normalizedPrefix.startsWith("agent:");
|
||||
if (isLegacyRaw) {
|
||||
if (!rawKey.startsWith(normalizedPrefix)) {
|
||||
continue;
|
||||
}
|
||||
} else if (!normalizedKey.startsWith(normalizedPrefix)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return rule.action === "allow";
|
||||
}
|
||||
const fallback = scope.default ?? "allow";
|
||||
return fallback === "allow";
|
||||
}
|
||||
|
||||
export function deriveQmdScopeChannel(key?: string): string | undefined {
|
||||
return parseQmdSessionScope(key).channel;
|
||||
}
|
||||
|
||||
export function deriveQmdScopeChatType(key?: string): "channel" | "group" | "direct" | undefined {
|
||||
return parseQmdSessionScope(key).chatType;
|
||||
}
|
||||
|
||||
function parseQmdSessionScope(key?: string): ParsedQmdSessionScope {
|
||||
const normalized = normalizeQmdSessionKey(key);
|
||||
if (!normalized) {
|
||||
return {};
|
||||
}
|
||||
const parts = normalized.split(":").filter(Boolean);
|
||||
let chatType: ParsedQmdSessionScope["chatType"];
|
||||
if (
|
||||
parts.length >= 2 &&
|
||||
(parts[1] === "group" || parts[1] === "channel" || parts[1] === "direct" || parts[1] === "dm")
|
||||
) {
|
||||
if (parts.includes("group")) {
|
||||
chatType = "group";
|
||||
} else if (parts.includes("channel")) {
|
||||
chatType = "channel";
|
||||
}
|
||||
return {
|
||||
normalizedKey: normalized,
|
||||
channel: normalizeOptionalLowercaseString(parts[0]),
|
||||
chatType: chatType ?? "direct",
|
||||
};
|
||||
}
|
||||
if (normalized.includes(":group:")) {
|
||||
return { normalizedKey: normalized, chatType: "group" };
|
||||
}
|
||||
if (normalized.includes(":channel:")) {
|
||||
return { normalizedKey: normalized, chatType: "channel" };
|
||||
}
|
||||
return { normalizedKey: normalized, chatType: "direct" };
|
||||
}
|
||||
|
||||
function normalizeQmdSessionKey(key?: string): string | undefined {
|
||||
if (!key) {
|
||||
return undefined;
|
||||
}
|
||||
const trimmed = key.trim();
|
||||
if (!trimmed) {
|
||||
return undefined;
|
||||
}
|
||||
const parsed = parseAgentSessionKey(trimmed);
|
||||
const normalized = normalizeLowercaseStringOrEmpty(parsed?.rest ?? trimmed);
|
||||
if (normalized.startsWith("subagent:")) {
|
||||
return undefined;
|
||||
}
|
||||
return normalized;
|
||||
}
|
||||
export * from "../../../packages/memory-host-sdk/src/host/qmd-scope.js";
|
||||
|
||||
@@ -1,830 +1 @@
|
||||
/**
|
||||
* Query expansion for FTS-only search mode.
|
||||
*
|
||||
* When no embedding provider is available, we fall back to FTS (full-text search).
|
||||
* FTS works best with specific keywords, but users often ask conversational queries
|
||||
* like "that thing we discussed yesterday" or "之前讨论的那个方案".
|
||||
*
|
||||
* This module extracts meaningful keywords from such queries to improve FTS results.
|
||||
*/
|
||||
|
||||
import { normalizeLowercaseStringOrEmpty } from "../../shared/string-coerce.js";
|
||||
|
||||
// Common stop words that don't add search value
|
||||
const STOP_WORDS_EN = new Set([
|
||||
// Articles and determiners
|
||||
"a",
|
||||
"an",
|
||||
"the",
|
||||
"this",
|
||||
"that",
|
||||
"these",
|
||||
"those",
|
||||
// Pronouns
|
||||
"i",
|
||||
"me",
|
||||
"my",
|
||||
"we",
|
||||
"our",
|
||||
"you",
|
||||
"your",
|
||||
"he",
|
||||
"she",
|
||||
"it",
|
||||
"they",
|
||||
"them",
|
||||
// Common verbs
|
||||
"is",
|
||||
"are",
|
||||
"was",
|
||||
"were",
|
||||
"be",
|
||||
"been",
|
||||
"being",
|
||||
"have",
|
||||
"has",
|
||||
"had",
|
||||
"do",
|
||||
"does",
|
||||
"did",
|
||||
"will",
|
||||
"would",
|
||||
"could",
|
||||
"should",
|
||||
"can",
|
||||
"may",
|
||||
"might",
|
||||
// Prepositions
|
||||
"in",
|
||||
"on",
|
||||
"at",
|
||||
"to",
|
||||
"for",
|
||||
"of",
|
||||
"with",
|
||||
"by",
|
||||
"from",
|
||||
"about",
|
||||
"into",
|
||||
"through",
|
||||
"during",
|
||||
"before",
|
||||
"after",
|
||||
"above",
|
||||
"below",
|
||||
"between",
|
||||
"under",
|
||||
"over",
|
||||
// Conjunctions
|
||||
"and",
|
||||
"or",
|
||||
"but",
|
||||
"if",
|
||||
"then",
|
||||
"because",
|
||||
"as",
|
||||
"while",
|
||||
"when",
|
||||
"where",
|
||||
"what",
|
||||
"which",
|
||||
"who",
|
||||
"how",
|
||||
"why",
|
||||
// Time references (vague, not useful for FTS)
|
||||
"yesterday",
|
||||
"today",
|
||||
"tomorrow",
|
||||
"earlier",
|
||||
"later",
|
||||
"recently",
|
||||
"before",
|
||||
"ago",
|
||||
"just",
|
||||
"now",
|
||||
// Vague references
|
||||
"thing",
|
||||
"things",
|
||||
"stuff",
|
||||
"something",
|
||||
"anything",
|
||||
"everything",
|
||||
"nothing",
|
||||
// Question words
|
||||
"please",
|
||||
"help",
|
||||
"find",
|
||||
"show",
|
||||
"get",
|
||||
"tell",
|
||||
"give",
|
||||
]);
|
||||
|
||||
const STOP_WORDS_ES = new Set([
|
||||
// Articles and determiners
|
||||
"el",
|
||||
"la",
|
||||
"los",
|
||||
"las",
|
||||
"un",
|
||||
"una",
|
||||
"unos",
|
||||
"unas",
|
||||
"este",
|
||||
"esta",
|
||||
"ese",
|
||||
"esa",
|
||||
// Pronouns
|
||||
"yo",
|
||||
"me",
|
||||
"mi",
|
||||
"nosotros",
|
||||
"nosotras",
|
||||
"tu",
|
||||
"tus",
|
||||
"usted",
|
||||
"ustedes",
|
||||
"ellos",
|
||||
"ellas",
|
||||
// Prepositions and conjunctions
|
||||
"de",
|
||||
"del",
|
||||
"a",
|
||||
"en",
|
||||
"con",
|
||||
"por",
|
||||
"para",
|
||||
"sobre",
|
||||
"entre",
|
||||
"y",
|
||||
"o",
|
||||
"pero",
|
||||
"si",
|
||||
"porque",
|
||||
"como",
|
||||
// Common verbs / auxiliaries
|
||||
"es",
|
||||
"son",
|
||||
"fue",
|
||||
"fueron",
|
||||
"ser",
|
||||
"estar",
|
||||
"haber",
|
||||
"tener",
|
||||
"hacer",
|
||||
// Time references (vague)
|
||||
"ayer",
|
||||
"hoy",
|
||||
"mañana",
|
||||
"antes",
|
||||
"despues",
|
||||
"después",
|
||||
"ahora",
|
||||
"recientemente",
|
||||
// Question/request words
|
||||
"que",
|
||||
"qué",
|
||||
"cómo",
|
||||
"cuando",
|
||||
"cuándo",
|
||||
"donde",
|
||||
"dónde",
|
||||
"porqué",
|
||||
"favor",
|
||||
"ayuda",
|
||||
]);
|
||||
|
||||
const STOP_WORDS_PT = new Set([
|
||||
// Articles and determiners
|
||||
"o",
|
||||
"a",
|
||||
"os",
|
||||
"as",
|
||||
"um",
|
||||
"uma",
|
||||
"uns",
|
||||
"umas",
|
||||
"este",
|
||||
"esta",
|
||||
"esse",
|
||||
"essa",
|
||||
// Pronouns
|
||||
"eu",
|
||||
"me",
|
||||
"meu",
|
||||
"minha",
|
||||
"nos",
|
||||
"nós",
|
||||
"você",
|
||||
"vocês",
|
||||
"ele",
|
||||
"ela",
|
||||
"eles",
|
||||
"elas",
|
||||
// Prepositions and conjunctions
|
||||
"de",
|
||||
"do",
|
||||
"da",
|
||||
"em",
|
||||
"com",
|
||||
"por",
|
||||
"para",
|
||||
"sobre",
|
||||
"entre",
|
||||
"e",
|
||||
"ou",
|
||||
"mas",
|
||||
"se",
|
||||
"porque",
|
||||
"como",
|
||||
// Common verbs / auxiliaries
|
||||
"é",
|
||||
"são",
|
||||
"foi",
|
||||
"foram",
|
||||
"ser",
|
||||
"estar",
|
||||
"ter",
|
||||
"fazer",
|
||||
// Time references (vague)
|
||||
"ontem",
|
||||
"hoje",
|
||||
"amanhã",
|
||||
"antes",
|
||||
"depois",
|
||||
"agora",
|
||||
"recentemente",
|
||||
// Question/request words
|
||||
"que",
|
||||
"quê",
|
||||
"quando",
|
||||
"onde",
|
||||
"porquê",
|
||||
"favor",
|
||||
"ajuda",
|
||||
]);
|
||||
|
||||
const STOP_WORDS_AR = new Set([
|
||||
// Articles and connectors
|
||||
"ال",
|
||||
"و",
|
||||
"أو",
|
||||
"لكن",
|
||||
"ثم",
|
||||
"بل",
|
||||
// Pronouns / references
|
||||
"أنا",
|
||||
"نحن",
|
||||
"هو",
|
||||
"هي",
|
||||
"هم",
|
||||
"هذا",
|
||||
"هذه",
|
||||
"ذلك",
|
||||
"تلك",
|
||||
"هنا",
|
||||
"هناك",
|
||||
// Common prepositions
|
||||
"من",
|
||||
"إلى",
|
||||
"الى",
|
||||
"في",
|
||||
"على",
|
||||
"عن",
|
||||
"مع",
|
||||
"بين",
|
||||
"ل",
|
||||
"ب",
|
||||
"ك",
|
||||
// Common auxiliaries / vague verbs
|
||||
"كان",
|
||||
"كانت",
|
||||
"يكون",
|
||||
"تكون",
|
||||
"صار",
|
||||
"أصبح",
|
||||
"يمكن",
|
||||
"ممكن",
|
||||
// Time references (vague)
|
||||
"بالأمس",
|
||||
"امس",
|
||||
"اليوم",
|
||||
"غدا",
|
||||
"الآن",
|
||||
"قبل",
|
||||
"بعد",
|
||||
"مؤخرا",
|
||||
// Question/request words
|
||||
"لماذا",
|
||||
"كيف",
|
||||
"ماذا",
|
||||
"متى",
|
||||
"أين",
|
||||
"هل",
|
||||
"من فضلك",
|
||||
"فضلا",
|
||||
"ساعد",
|
||||
]);
|
||||
|
||||
const STOP_WORDS_KO = new Set([
|
||||
// Particles (조사)
|
||||
"은",
|
||||
"는",
|
||||
"이",
|
||||
"가",
|
||||
"을",
|
||||
"를",
|
||||
"의",
|
||||
"에",
|
||||
"에서",
|
||||
"로",
|
||||
"으로",
|
||||
"와",
|
||||
"과",
|
||||
"도",
|
||||
"만",
|
||||
"까지",
|
||||
"부터",
|
||||
"한테",
|
||||
"에게",
|
||||
"께",
|
||||
"처럼",
|
||||
"같이",
|
||||
"보다",
|
||||
"마다",
|
||||
"밖에",
|
||||
"대로",
|
||||
// Pronouns (대명사)
|
||||
"나",
|
||||
"나는",
|
||||
"내가",
|
||||
"나를",
|
||||
"너",
|
||||
"우리",
|
||||
"저",
|
||||
"저희",
|
||||
"그",
|
||||
"그녀",
|
||||
"그들",
|
||||
"이것",
|
||||
"저것",
|
||||
"그것",
|
||||
"여기",
|
||||
"저기",
|
||||
"거기",
|
||||
// Common verbs / auxiliaries (일반 동사/보조 동사)
|
||||
"있다",
|
||||
"없다",
|
||||
"하다",
|
||||
"되다",
|
||||
"이다",
|
||||
"아니다",
|
||||
"보다",
|
||||
"주다",
|
||||
"오다",
|
||||
"가다",
|
||||
// Nouns (의존 명사 / vague)
|
||||
"것",
|
||||
"거",
|
||||
"등",
|
||||
"수",
|
||||
"때",
|
||||
"곳",
|
||||
"중",
|
||||
"분",
|
||||
// Adverbs
|
||||
"잘",
|
||||
"더",
|
||||
"또",
|
||||
"매우",
|
||||
"정말",
|
||||
"아주",
|
||||
"많이",
|
||||
"너무",
|
||||
"좀",
|
||||
// Conjunctions
|
||||
"그리고",
|
||||
"하지만",
|
||||
"그래서",
|
||||
"그런데",
|
||||
"그러나",
|
||||
"또는",
|
||||
"그러면",
|
||||
// Question words
|
||||
"왜",
|
||||
"어떻게",
|
||||
"뭐",
|
||||
"언제",
|
||||
"어디",
|
||||
"누구",
|
||||
"무엇",
|
||||
"어떤",
|
||||
// Time (vague)
|
||||
"어제",
|
||||
"오늘",
|
||||
"내일",
|
||||
"최근",
|
||||
"지금",
|
||||
"아까",
|
||||
"나중",
|
||||
"전에",
|
||||
// Request words
|
||||
"제발",
|
||||
"부탁",
|
||||
]);
|
||||
|
||||
// Common Korean trailing particles to strip from words for tokenization
|
||||
// Sorted by descending length so longest-match-first is guaranteed.
|
||||
const KO_TRAILING_PARTICLES = [
|
||||
"에서",
|
||||
"으로",
|
||||
"에게",
|
||||
"한테",
|
||||
"처럼",
|
||||
"같이",
|
||||
"보다",
|
||||
"까지",
|
||||
"부터",
|
||||
"마다",
|
||||
"밖에",
|
||||
"대로",
|
||||
"은",
|
||||
"는",
|
||||
"이",
|
||||
"가",
|
||||
"을",
|
||||
"를",
|
||||
"의",
|
||||
"에",
|
||||
"로",
|
||||
"와",
|
||||
"과",
|
||||
"도",
|
||||
"만",
|
||||
].toSorted((a, b) => b.length - a.length);
|
||||
|
||||
function stripKoreanTrailingParticle(token: string): string | null {
|
||||
for (const particle of KO_TRAILING_PARTICLES) {
|
||||
if (token.length > particle.length && token.endsWith(particle)) {
|
||||
return token.slice(0, -particle.length);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function isUsefulKoreanStem(stem: string): boolean {
|
||||
// Prevent bogus one-syllable stems from words like "논의" -> "논".
|
||||
if (/[\uac00-\ud7af]/.test(stem)) {
|
||||
return stem.length >= 2;
|
||||
}
|
||||
// Keep stripped ASCII stems for mixed tokens like "API를" -> "api".
|
||||
return /^[a-z0-9_]+$/i.test(stem);
|
||||
}
|
||||
|
||||
const STOP_WORDS_JA = new Set([
|
||||
// Pronouns and references
|
||||
"これ",
|
||||
"それ",
|
||||
"あれ",
|
||||
"この",
|
||||
"その",
|
||||
"あの",
|
||||
"ここ",
|
||||
"そこ",
|
||||
"あそこ",
|
||||
// Common auxiliaries / vague verbs
|
||||
"する",
|
||||
"した",
|
||||
"して",
|
||||
"です",
|
||||
"ます",
|
||||
"いる",
|
||||
"ある",
|
||||
"なる",
|
||||
"できる",
|
||||
// Particles / connectors
|
||||
"の",
|
||||
"こと",
|
||||
"もの",
|
||||
"ため",
|
||||
"そして",
|
||||
"しかし",
|
||||
"また",
|
||||
"でも",
|
||||
"から",
|
||||
"まで",
|
||||
"より",
|
||||
"だけ",
|
||||
// Question words
|
||||
"なぜ",
|
||||
"どう",
|
||||
"何",
|
||||
"いつ",
|
||||
"どこ",
|
||||
"誰",
|
||||
"どれ",
|
||||
// Time (vague)
|
||||
"昨日",
|
||||
"今日",
|
||||
"明日",
|
||||
"最近",
|
||||
"今",
|
||||
"さっき",
|
||||
"前",
|
||||
"後",
|
||||
]);
|
||||
|
||||
const STOP_WORDS_ZH = new Set([
|
||||
// Pronouns
|
||||
"我",
|
||||
"我们",
|
||||
"你",
|
||||
"你们",
|
||||
"他",
|
||||
"她",
|
||||
"它",
|
||||
"他们",
|
||||
"这",
|
||||
"那",
|
||||
"这个",
|
||||
"那个",
|
||||
"这些",
|
||||
"那些",
|
||||
// Auxiliary words
|
||||
"的",
|
||||
"了",
|
||||
"着",
|
||||
"过",
|
||||
"得",
|
||||
"地",
|
||||
"吗",
|
||||
"呢",
|
||||
"吧",
|
||||
"啊",
|
||||
"呀",
|
||||
"嘛",
|
||||
"啦",
|
||||
// Verbs (common, vague)
|
||||
"是",
|
||||
"有",
|
||||
"在",
|
||||
"被",
|
||||
"把",
|
||||
"给",
|
||||
"让",
|
||||
"用",
|
||||
"到",
|
||||
"去",
|
||||
"来",
|
||||
"做",
|
||||
"说",
|
||||
"看",
|
||||
"找",
|
||||
"想",
|
||||
"要",
|
||||
"能",
|
||||
"会",
|
||||
"可以",
|
||||
// Prepositions and conjunctions
|
||||
"和",
|
||||
"与",
|
||||
"或",
|
||||
"但",
|
||||
"但是",
|
||||
"因为",
|
||||
"所以",
|
||||
"如果",
|
||||
"虽然",
|
||||
"而",
|
||||
"也",
|
||||
"都",
|
||||
"就",
|
||||
"还",
|
||||
"又",
|
||||
"再",
|
||||
"才",
|
||||
"只",
|
||||
// Time (vague)
|
||||
"之前",
|
||||
"以前",
|
||||
"之后",
|
||||
"以后",
|
||||
"刚才",
|
||||
"现在",
|
||||
"昨天",
|
||||
"今天",
|
||||
"明天",
|
||||
"最近",
|
||||
// Vague references
|
||||
"东西",
|
||||
"事情",
|
||||
"事",
|
||||
"什么",
|
||||
"哪个",
|
||||
"哪些",
|
||||
"怎么",
|
||||
"为什么",
|
||||
"多少",
|
||||
// Question/request words
|
||||
"请",
|
||||
"帮",
|
||||
"帮忙",
|
||||
"告诉",
|
||||
]);
|
||||
|
||||
export function isQueryStopWordToken(token: string): boolean {
|
||||
return (
|
||||
STOP_WORDS_EN.has(token) ||
|
||||
STOP_WORDS_ES.has(token) ||
|
||||
STOP_WORDS_PT.has(token) ||
|
||||
STOP_WORDS_AR.has(token) ||
|
||||
STOP_WORDS_ZH.has(token) ||
|
||||
STOP_WORDS_KO.has(token) ||
|
||||
STOP_WORDS_JA.has(token)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a token looks like a meaningful keyword.
|
||||
* Returns false for short tokens, numbers-only, etc.
|
||||
*/
|
||||
function isValidKeyword(token: string): boolean {
|
||||
if (!token || token.length === 0) {
|
||||
return false;
|
||||
}
|
||||
// Skip very short English words (likely stop words or fragments)
|
||||
if (/^[a-zA-Z]+$/.test(token) && token.length < 3) {
|
||||
return false;
|
||||
}
|
||||
// Skip pure numbers (not useful for semantic search)
|
||||
if (/^\d+$/.test(token)) {
|
||||
return false;
|
||||
}
|
||||
// Skip tokens that are all punctuation
|
||||
if (/^[\p{P}\p{S}]+$/u.test(token)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple tokenizer that handles English, Chinese, Korean, and Japanese text.
|
||||
* For Chinese, we do character-based splitting since we don't have a proper segmenter.
|
||||
* For English, we split on whitespace and punctuation.
|
||||
*/
|
||||
function tokenize(text: string, opts?: { ftsTokenizer?: "unicode61" | "trigram" }): string[] {
|
||||
const useTrigram = opts?.ftsTokenizer === "trigram";
|
||||
const tokens: string[] = [];
|
||||
const normalized = normalizeLowercaseStringOrEmpty(text);
|
||||
|
||||
// Split into segments (English words, Chinese character sequences, etc.)
|
||||
const segments = normalized.split(/[\s\p{P}]+/u).filter(Boolean);
|
||||
|
||||
for (const segment of segments) {
|
||||
// Japanese text often mixes scripts (kanji/kana/ASCII) without spaces.
|
||||
// Extract script-specific chunks so technical terms like "API" / "バグ" are retained.
|
||||
if (/[\u3040-\u30ff]/.test(segment)) {
|
||||
const jpParts =
|
||||
segment.match(/[a-z0-9_]+|[\u30a0-\u30ffー]+|[\u4e00-\u9fff]+|[\u3040-\u309f]{2,}/g) ?? [];
|
||||
for (const part of jpParts) {
|
||||
if (/^[\u4e00-\u9fff]+$/.test(part)) {
|
||||
tokens.push(part);
|
||||
if (!useTrigram) {
|
||||
for (let i = 0; i < part.length - 1; i++) {
|
||||
tokens.push(part[i] + part[i + 1]);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
tokens.push(part);
|
||||
}
|
||||
}
|
||||
} else if (/[\u4e00-\u9fff]/.test(segment)) {
|
||||
// Check if segment contains CJK characters (Chinese)
|
||||
const chars = Array.from(segment).filter((c) => /[\u4e00-\u9fff]/.test(c));
|
||||
if (useTrigram) {
|
||||
// In trigram mode, push the whole contiguous CJK block (mirroring the
|
||||
// Japanese kanji path). SQLite's trigram FTS requires at least 3 characters
|
||||
// per query term — individual characters silently return no results.
|
||||
const block = chars.join("");
|
||||
if (block.length > 0) {
|
||||
tokens.push(block);
|
||||
}
|
||||
} else {
|
||||
// Default mode: unigrams + bigrams for phrase matching
|
||||
tokens.push(...chars);
|
||||
for (let i = 0; i < chars.length - 1; i++) {
|
||||
tokens.push(chars[i] + chars[i + 1]);
|
||||
}
|
||||
}
|
||||
} else if (/[\uac00-\ud7af\u3131-\u3163]/.test(segment)) {
|
||||
// For Korean (Hangul syllables and jamo), keep the word as-is unless it is
|
||||
// effectively a stop word once trailing particles are removed.
|
||||
const stem = stripKoreanTrailingParticle(segment);
|
||||
const stemIsStopWord = stem !== null && STOP_WORDS_KO.has(stem);
|
||||
if (!STOP_WORDS_KO.has(segment) && !stemIsStopWord) {
|
||||
tokens.push(segment);
|
||||
}
|
||||
// Also emit particle-stripped stems when they are useful keywords.
|
||||
if (stem && !STOP_WORDS_KO.has(stem) && isUsefulKoreanStem(stem)) {
|
||||
tokens.push(stem);
|
||||
}
|
||||
} else {
|
||||
// For non-CJK, keep as single token
|
||||
tokens.push(segment);
|
||||
}
|
||||
}
|
||||
|
||||
return tokens;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract keywords from a conversational query for FTS search.
|
||||
*
|
||||
* Examples:
|
||||
* - "that thing we discussed about the API" → ["discussed", "API"]
|
||||
* - "之前讨论的那个方案" → ["讨论", "方案"]
|
||||
* - "what was the solution for the bug" → ["solution", "bug"]
|
||||
*/
|
||||
export function extractKeywords(
|
||||
query: string,
|
||||
opts?: { ftsTokenizer?: "unicode61" | "trigram" },
|
||||
): string[] {
|
||||
const tokens = tokenize(query, opts);
|
||||
const keywords: string[] = [];
|
||||
const seen = new Set<string>();
|
||||
|
||||
for (const token of tokens) {
|
||||
// Skip stop words
|
||||
if (isQueryStopWordToken(token)) {
|
||||
continue;
|
||||
}
|
||||
// Skip invalid keywords
|
||||
if (!isValidKeyword(token)) {
|
||||
continue;
|
||||
}
|
||||
// Skip duplicates
|
||||
if (seen.has(token)) {
|
||||
continue;
|
||||
}
|
||||
seen.add(token);
|
||||
keywords.push(token);
|
||||
}
|
||||
|
||||
return keywords;
|
||||
}
|
||||
|
||||
/**
|
||||
* Expand a query for FTS search.
|
||||
* Returns both the original query and extracted keywords for OR-matching.
|
||||
*
|
||||
* @param query - User's original query
|
||||
* @returns Object with original query and extracted keywords
|
||||
*/
|
||||
export function expandQueryForFts(
|
||||
query: string,
|
||||
opts?: { ftsTokenizer?: "unicode61" | "trigram" },
|
||||
): {
|
||||
original: string;
|
||||
keywords: string[];
|
||||
expanded: string;
|
||||
} {
|
||||
const original = query.trim();
|
||||
const keywords = extractKeywords(original, opts);
|
||||
|
||||
// Build expanded query: original terms OR extracted keywords
|
||||
// This ensures both exact matches and keyword matches are found
|
||||
const expanded = keywords.length > 0 ? `${original} OR ${keywords.join(" OR ")}` : original;
|
||||
|
||||
return { original, keywords, expanded };
|
||||
}
|
||||
|
||||
/**
|
||||
* Type for an optional LLM-based query expander.
|
||||
* Can be provided to enhance keyword extraction with semantic understanding.
|
||||
*/
|
||||
export type LlmQueryExpander = (query: string) => Promise<string[]>;
|
||||
|
||||
/**
|
||||
* Expand query with optional LLM assistance.
|
||||
* Falls back to local extraction if LLM is unavailable or fails.
|
||||
*/
|
||||
export async function expandQueryWithLlm(
|
||||
query: string,
|
||||
llmExpander?: LlmQueryExpander,
|
||||
opts?: { ftsTokenizer?: "unicode61" | "trigram" },
|
||||
): Promise<string[]> {
|
||||
// If LLM expander is provided, try it first
|
||||
if (llmExpander) {
|
||||
try {
|
||||
const llmKeywords = await llmExpander(query);
|
||||
if (llmKeywords.length > 0) {
|
||||
return llmKeywords;
|
||||
}
|
||||
} catch {
|
||||
// LLM failed, fall back to local extraction
|
||||
}
|
||||
}
|
||||
|
||||
// Fall back to local keyword extraction
|
||||
return extractKeywords(query, opts);
|
||||
}
|
||||
export * from "../../../packages/memory-host-sdk/src/host/query-expansion.js";
|
||||
|
||||
@@ -1,107 +1 @@
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { resolveAgentContextLimits, resolveAgentWorkspaceDir } from "../../agents/agent-scope.js";
|
||||
import { resolveMemorySearchConfig } from "../../agents/memory-search.js";
|
||||
import type { OpenClawConfig } from "../../config/types.openclaw.js";
|
||||
import { isFileMissingError, statRegularFile } from "./fs-utils.js";
|
||||
import { isMemoryPath, normalizeExtraMemoryPaths } from "./internal.js";
|
||||
import {
|
||||
buildMemoryReadResult,
|
||||
DEFAULT_MEMORY_READ_LINES,
|
||||
type MemoryReadResult,
|
||||
} from "./read-file-shared.js";
|
||||
|
||||
export async function readMemoryFile(params: {
|
||||
workspaceDir: string;
|
||||
extraPaths?: string[];
|
||||
relPath: string;
|
||||
from?: number;
|
||||
lines?: number;
|
||||
defaultLines?: number;
|
||||
maxChars?: number;
|
||||
}): Promise<MemoryReadResult> {
|
||||
const rawPath = params.relPath.trim();
|
||||
if (!rawPath) {
|
||||
throw new Error("path required");
|
||||
}
|
||||
const absPath = path.isAbsolute(rawPath)
|
||||
? path.resolve(rawPath)
|
||||
: path.resolve(params.workspaceDir, rawPath);
|
||||
const relPath = path.relative(params.workspaceDir, absPath).replace(/\\/g, "/");
|
||||
const inWorkspace = relPath.length > 0 && !relPath.startsWith("..") && !path.isAbsolute(relPath);
|
||||
const allowedWorkspace = inWorkspace && isMemoryPath(relPath);
|
||||
let allowedAdditional = false;
|
||||
if (!allowedWorkspace && (params.extraPaths?.length ?? 0) > 0) {
|
||||
const additionalPaths = normalizeExtraMemoryPaths(params.workspaceDir, params.extraPaths);
|
||||
for (const additionalPath of additionalPaths) {
|
||||
try {
|
||||
const stat = await fs.lstat(additionalPath);
|
||||
if (stat.isSymbolicLink()) {
|
||||
continue;
|
||||
}
|
||||
if (stat.isDirectory()) {
|
||||
if (absPath === additionalPath || absPath.startsWith(`${additionalPath}${path.sep}`)) {
|
||||
allowedAdditional = true;
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (stat.isFile() && absPath === additionalPath && absPath.endsWith(".md")) {
|
||||
allowedAdditional = true;
|
||||
break;
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
}
|
||||
if (!allowedWorkspace && !allowedAdditional) {
|
||||
throw new Error("path required");
|
||||
}
|
||||
if (!absPath.endsWith(".md")) {
|
||||
throw new Error("path required");
|
||||
}
|
||||
const statResult = await statRegularFile(absPath);
|
||||
if (statResult.missing) {
|
||||
return { text: "", path: relPath };
|
||||
}
|
||||
let content: string;
|
||||
try {
|
||||
content = await fs.readFile(absPath, "utf-8");
|
||||
} catch (err) {
|
||||
if (isFileMissingError(err)) {
|
||||
return { text: "", path: relPath };
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
return buildMemoryReadResult({
|
||||
content,
|
||||
relPath,
|
||||
from: params.from,
|
||||
lines: params.lines,
|
||||
defaultLines: params.defaultLines ?? DEFAULT_MEMORY_READ_LINES,
|
||||
maxChars: params.maxChars,
|
||||
suggestReadFallback: allowedWorkspace,
|
||||
});
|
||||
}
|
||||
|
||||
export async function readAgentMemoryFile(params: {
|
||||
cfg: OpenClawConfig;
|
||||
agentId: string;
|
||||
relPath: string;
|
||||
from?: number;
|
||||
lines?: number;
|
||||
}): Promise<MemoryReadResult> {
|
||||
const settings = resolveMemorySearchConfig(params.cfg, params.agentId);
|
||||
if (!settings) {
|
||||
throw new Error("memory search disabled");
|
||||
}
|
||||
const contextLimits = resolveAgentContextLimits(params.cfg, params.agentId);
|
||||
return await readMemoryFile({
|
||||
workspaceDir: resolveAgentWorkspaceDir(params.cfg, params.agentId),
|
||||
extraPaths: settings.extraPaths,
|
||||
relPath: params.relPath,
|
||||
from: params.from,
|
||||
lines: params.lines,
|
||||
defaultLines: contextLimits?.memoryGetDefaultLines,
|
||||
maxChars: contextLimits?.memoryGetMaxChars,
|
||||
});
|
||||
}
|
||||
export * from "../../../packages/memory-host-sdk/src/host/read-file.js";
|
||||
|
||||
@@ -1,565 +1 @@
|
||||
import fsSync from "node:fs";
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { stripInternalRuntimeContext } from "../../agents/internal-runtime-context.js";
|
||||
import { isHeartbeatUserMessage } from "../../auto-reply/heartbeat-filter.js";
|
||||
import { HEARTBEAT_PROMPT } from "../../auto-reply/heartbeat.js";
|
||||
import { stripInboundMetadata } from "../../auto-reply/reply/strip-inbound-meta.js";
|
||||
import { HEARTBEAT_TOKEN, isSilentReplyPayloadText } from "../../auto-reply/tokens.js";
|
||||
import {
|
||||
isCompactionCheckpointTranscriptFileName,
|
||||
isSessionArchiveArtifactName,
|
||||
isUsageCountedSessionTranscriptFileName,
|
||||
} from "../../config/sessions/artifacts.js";
|
||||
import { resolveSessionTranscriptsDirForAgent } from "../../config/sessions/paths.js";
|
||||
import { isExecCompletionEvent } from "../../infra/heartbeat-events-filter.js";
|
||||
import { redactSensitiveText } from "../../logging/redact.js";
|
||||
import { hasInterSessionUserProvenance } from "../../sessions/input-provenance.js";
|
||||
import { isCronRunSessionKey } from "../../sessions/session-key-utils.js";
|
||||
import { hashText } from "./hash.js";
|
||||
|
||||
const DREAMING_NARRATIVE_RUN_PREFIX = "dreaming-narrative-";
|
||||
// Keep the historical one-line-per-message export shape for normal turns, but
|
||||
// wrap pathological long messages so downstream indexers never ingest a single
|
||||
// toxic line. Wrapped continuation lines still map back to the same JSONL line.
|
||||
// This limit applies to content only; the role label adds up to 11 chars.
|
||||
const SESSION_EXPORT_CONTENT_WRAP_CHARS = 800;
|
||||
const DIRECT_CRON_PROMPT_RE = /^\[cron:[^\]]+\]\s*/;
|
||||
|
||||
export type SessionFileEntry = {
|
||||
path: string;
|
||||
absPath: string;
|
||||
mtimeMs: number;
|
||||
size: number;
|
||||
hash: string;
|
||||
content: string;
|
||||
/** Maps each content line (0-indexed) to its 1-indexed JSONL source line. */
|
||||
lineMap: number[];
|
||||
/** Maps each content line (0-indexed) to epoch ms; 0 means unknown timestamp. */
|
||||
messageTimestampsMs: number[];
|
||||
/** True when this transcript belongs to an internal dreaming narrative run. */
|
||||
generatedByDreamingNarrative?: boolean;
|
||||
/** True when this transcript belongs to an isolated cron run session. */
|
||||
generatedByCronRun?: boolean;
|
||||
};
|
||||
|
||||
export type BuildSessionEntryOptions = {
|
||||
/** Optional preclassification from a caller-managed dreaming transcript lookup. */
|
||||
generatedByDreamingNarrative?: boolean;
|
||||
/** Optional preclassification from a caller-managed cron transcript lookup. */
|
||||
generatedByCronRun?: boolean;
|
||||
};
|
||||
|
||||
export type SessionTranscriptClassification = {
|
||||
dreamingNarrativeTranscriptPaths: ReadonlySet<string>;
|
||||
cronRunTranscriptPaths: ReadonlySet<string>;
|
||||
};
|
||||
|
||||
type SessionTranscriptStoreEntry = {
|
||||
sessionFile?: unknown;
|
||||
sessionId?: unknown;
|
||||
};
|
||||
|
||||
function shouldSkipTranscriptFileForDreaming(absPath: string): boolean {
|
||||
const fileName = path.basename(absPath);
|
||||
return (
|
||||
isSessionArchiveArtifactName(fileName) || isCompactionCheckpointTranscriptFileName(fileName)
|
||||
);
|
||||
}
|
||||
|
||||
function isDreamingNarrativeBootstrapRecord(record: unknown): boolean {
|
||||
if (!record || typeof record !== "object" || Array.isArray(record)) {
|
||||
return false;
|
||||
}
|
||||
const candidate = record as {
|
||||
type?: unknown;
|
||||
customType?: unknown;
|
||||
data?: unknown;
|
||||
};
|
||||
if (
|
||||
candidate.type !== "custom" ||
|
||||
candidate.customType !== "openclaw:bootstrap-context:full" ||
|
||||
!candidate.data ||
|
||||
typeof candidate.data !== "object" ||
|
||||
Array.isArray(candidate.data)
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
const runId = (candidate.data as { runId?: unknown }).runId;
|
||||
return typeof runId === "string" && runId.startsWith(DREAMING_NARRATIVE_RUN_PREFIX);
|
||||
}
|
||||
|
||||
function hasDreamingNarrativeRunId(value: unknown): boolean {
|
||||
return typeof value === "string" && value.startsWith(DREAMING_NARRATIVE_RUN_PREFIX);
|
||||
}
|
||||
|
||||
function isDreamingNarrativeGeneratedRecord(record: unknown): boolean {
|
||||
if (isDreamingNarrativeBootstrapRecord(record)) {
|
||||
return true;
|
||||
}
|
||||
if (!record || typeof record !== "object" || Array.isArray(record)) {
|
||||
return false;
|
||||
}
|
||||
const candidate = record as {
|
||||
runId?: unknown;
|
||||
sessionKey?: unknown;
|
||||
data?: unknown;
|
||||
};
|
||||
if (
|
||||
hasDreamingNarrativeRunId(candidate.runId) ||
|
||||
hasDreamingNarrativeRunId(candidate.sessionKey)
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
if (!candidate.data || typeof candidate.data !== "object" || Array.isArray(candidate.data)) {
|
||||
return false;
|
||||
}
|
||||
const nested = candidate.data as {
|
||||
runId?: unknown;
|
||||
sessionKey?: unknown;
|
||||
};
|
||||
return hasDreamingNarrativeRunId(nested.runId) || hasDreamingNarrativeRunId(nested.sessionKey);
|
||||
}
|
||||
|
||||
function isDreamingNarrativeSessionStoreKey(sessionKey: string): boolean {
|
||||
const trimmed = sessionKey.trim();
|
||||
if (!trimmed) {
|
||||
return false;
|
||||
}
|
||||
const firstSeparator = trimmed.indexOf(":");
|
||||
if (firstSeparator < 0) {
|
||||
return trimmed.startsWith(DREAMING_NARRATIVE_RUN_PREFIX);
|
||||
}
|
||||
const secondSeparator = trimmed.indexOf(":", firstSeparator + 1);
|
||||
const sessionSegment = secondSeparator < 0 ? trimmed : trimmed.slice(secondSeparator + 1);
|
||||
return sessionSegment.startsWith(DREAMING_NARRATIVE_RUN_PREFIX);
|
||||
}
|
||||
|
||||
function normalizeComparablePath(pathname: string): string {
|
||||
const resolved = path.resolve(pathname);
|
||||
return process.platform === "win32" ? resolved.toLowerCase() : resolved;
|
||||
}
|
||||
|
||||
export function normalizeSessionTranscriptPathForComparison(pathname: string): string {
|
||||
return normalizeComparablePath(pathname);
|
||||
}
|
||||
|
||||
function resolveSessionStoreTranscriptPath(
|
||||
sessionsDir: string,
|
||||
entry: { sessionFile?: unknown; sessionId?: unknown } | undefined,
|
||||
): string | null {
|
||||
if (typeof entry?.sessionFile === "string" && entry.sessionFile.trim().length > 0) {
|
||||
const sessionFile = entry.sessionFile.trim();
|
||||
const resolved = path.isAbsolute(sessionFile)
|
||||
? sessionFile
|
||||
: path.resolve(sessionsDir, sessionFile);
|
||||
return normalizeComparablePath(resolved);
|
||||
}
|
||||
if (typeof entry?.sessionId === "string" && entry.sessionId.trim().length > 0) {
|
||||
return normalizeComparablePath(path.join(sessionsDir, `${entry.sessionId.trim()}.jsonl`));
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
export function loadDreamingNarrativeTranscriptPathSetForSessionsDir(
|
||||
sessionsDir: string,
|
||||
): ReadonlySet<string> {
|
||||
return loadSessionTranscriptClassificationForSessionsDir(sessionsDir)
|
||||
.dreamingNarrativeTranscriptPaths;
|
||||
}
|
||||
|
||||
export function loadSessionTranscriptClassificationForSessionsDir(
|
||||
sessionsDir: string,
|
||||
): SessionTranscriptClassification {
|
||||
const storePath = path.join(sessionsDir, "sessions.json");
|
||||
const store = readSessionTranscriptClassificationStore(storePath);
|
||||
const dreamingTranscriptPaths = new Set<string>();
|
||||
const cronRunTranscriptPaths = new Set<string>();
|
||||
for (const [sessionKey, entry] of Object.entries(store)) {
|
||||
const transcriptPath = resolveSessionStoreTranscriptPath(sessionsDir, entry);
|
||||
if (!transcriptPath) {
|
||||
continue;
|
||||
}
|
||||
if (isDreamingNarrativeSessionStoreKey(sessionKey)) {
|
||||
dreamingTranscriptPaths.add(transcriptPath);
|
||||
}
|
||||
if (isCronRunSessionKey(sessionKey)) {
|
||||
cronRunTranscriptPaths.add(transcriptPath);
|
||||
}
|
||||
}
|
||||
return {
|
||||
dreamingNarrativeTranscriptPaths: dreamingTranscriptPaths,
|
||||
cronRunTranscriptPaths,
|
||||
};
|
||||
}
|
||||
|
||||
function readSessionTranscriptClassificationStore(
|
||||
storePath: string,
|
||||
): Record<string, SessionTranscriptStoreEntry> {
|
||||
try {
|
||||
const parsed = JSON.parse(fsSync.readFileSync(storePath, "utf-8")) as unknown;
|
||||
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
|
||||
return {};
|
||||
}
|
||||
return parsed as Record<string, SessionTranscriptStoreEntry>;
|
||||
} catch {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
export function loadDreamingNarrativeTranscriptPathSetForAgent(
|
||||
agentId: string,
|
||||
): ReadonlySet<string> {
|
||||
return loadSessionTranscriptClassificationForAgent(agentId).dreamingNarrativeTranscriptPaths;
|
||||
}
|
||||
|
||||
export function loadSessionTranscriptClassificationForAgent(
|
||||
agentId: string,
|
||||
): SessionTranscriptClassification {
|
||||
return loadSessionTranscriptClassificationForSessionsDir(
|
||||
resolveSessionTranscriptsDirForAgent(agentId),
|
||||
);
|
||||
}
|
||||
|
||||
function classifySessionTranscriptFromSessionStore(absPath: string): {
|
||||
generatedByDreamingNarrative: boolean;
|
||||
generatedByCronRun: boolean;
|
||||
} {
|
||||
const sessionsDir = path.dirname(absPath);
|
||||
const normalizedAbsPath = normalizeComparablePath(absPath);
|
||||
const classification = loadSessionTranscriptClassificationForSessionsDir(sessionsDir);
|
||||
return {
|
||||
generatedByDreamingNarrative:
|
||||
classification.dreamingNarrativeTranscriptPaths.has(normalizedAbsPath),
|
||||
generatedByCronRun: classification.cronRunTranscriptPaths.has(normalizedAbsPath),
|
||||
};
|
||||
}
|
||||
|
||||
export async function listSessionFilesForAgent(agentId: string): Promise<string[]> {
|
||||
const dir = resolveSessionTranscriptsDirForAgent(agentId);
|
||||
try {
|
||||
const entries = await fs.readdir(dir, { withFileTypes: true });
|
||||
return entries
|
||||
.filter((entry) => entry.isFile())
|
||||
.map((entry) => entry.name)
|
||||
.filter((name) => isUsageCountedSessionTranscriptFileName(name))
|
||||
.map((name) => path.join(dir, name));
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
export function sessionPathForFile(absPath: string): string {
|
||||
return path.join("sessions", path.basename(absPath)).replace(/\\/g, "/");
|
||||
}
|
||||
|
||||
async function logSessionFileReadFailure(absPath: string, err: unknown): Promise<void> {
|
||||
const { createSubsystemLogger } = await import("../../logging/subsystem.js");
|
||||
createSubsystemLogger("memory").debug(`Failed reading session file ${absPath}: ${String(err)}`);
|
||||
}
|
||||
|
||||
function normalizeSessionText(value: string): string {
|
||||
return value
|
||||
.replace(/\s*\n+\s*/g, " ")
|
||||
.replace(/\s+/g, " ")
|
||||
.trim();
|
||||
}
|
||||
|
||||
function collectRawSessionText(content: unknown): string | null {
|
||||
if (typeof content === "string") {
|
||||
return content;
|
||||
}
|
||||
if (!Array.isArray(content)) {
|
||||
return null;
|
||||
}
|
||||
const parts: string[] = [];
|
||||
for (const block of content) {
|
||||
if (!block || typeof block !== "object") {
|
||||
continue;
|
||||
}
|
||||
const record = block as { type?: unknown; text?: unknown };
|
||||
if (record.type === "text" && typeof record.text === "string") {
|
||||
parts.push(record.text);
|
||||
}
|
||||
}
|
||||
return parts.length > 0 ? parts.join("\n") : null;
|
||||
}
|
||||
|
||||
function isHighSurrogate(code: number): boolean {
|
||||
return code >= 0xd800 && code <= 0xdbff;
|
||||
}
|
||||
|
||||
function isLowSurrogate(code: number): boolean {
|
||||
return code >= 0xdc00 && code <= 0xdfff;
|
||||
}
|
||||
|
||||
function splitLongSessionLine(
|
||||
text: string,
|
||||
maxChars: number = SESSION_EXPORT_CONTENT_WRAP_CHARS,
|
||||
): string[] {
|
||||
const normalized = text.trim();
|
||||
if (!normalized) {
|
||||
return [];
|
||||
}
|
||||
if (normalized.length <= maxChars) {
|
||||
return [normalized];
|
||||
}
|
||||
|
||||
const segments: string[] = [];
|
||||
let cursor = 0;
|
||||
while (cursor < normalized.length) {
|
||||
const remaining = normalized.length - cursor;
|
||||
if (remaining <= maxChars) {
|
||||
segments.push(normalized.slice(cursor).trim());
|
||||
break;
|
||||
}
|
||||
|
||||
const limit = cursor + maxChars;
|
||||
let splitAt = limit;
|
||||
for (let index = limit; index > cursor; index -= 1) {
|
||||
if (normalized[index] === " ") {
|
||||
splitAt = index;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (
|
||||
splitAt < normalized.length &&
|
||||
splitAt > cursor &&
|
||||
isHighSurrogate(normalized.charCodeAt(splitAt - 1)) &&
|
||||
isLowSurrogate(normalized.charCodeAt(splitAt))
|
||||
) {
|
||||
splitAt -= 1;
|
||||
}
|
||||
segments.push(normalized.slice(cursor, splitAt).trim());
|
||||
cursor = splitAt;
|
||||
while (cursor < normalized.length && normalized[cursor] === " ") {
|
||||
cursor += 1;
|
||||
}
|
||||
}
|
||||
|
||||
return segments.filter(Boolean);
|
||||
}
|
||||
|
||||
function renderSessionExportLines(label: string, text: string): string[] {
|
||||
return splitLongSessionLine(text).map((segment) => `${label}: ${segment}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Strip OpenClaw-injected inbound metadata envelopes from a raw text block.
|
||||
*
|
||||
* User-role messages arriving from external channels (Telegram, Discord,
|
||||
* Slack, …) are stored with a multi-line prefix containing Conversation info,
|
||||
* Sender info, and other AI-facing metadata blocks. These envelopes must be
|
||||
* removed BEFORE normalization, because `stripInboundMetadata` relies on
|
||||
* newline structure and fenced `json` code fences to locate sentinels; once
|
||||
* `normalizeSessionText` collapses newlines into spaces, stripping is
|
||||
* impossible.
|
||||
*
|
||||
* See: https://github.com/openclaw/openclaw/issues/63921
|
||||
*/
|
||||
function stripInboundMetadataForUserRole(text: string, role: "user" | "assistant"): string {
|
||||
if (role !== "user") {
|
||||
return text;
|
||||
}
|
||||
return stripInboundMetadata(text);
|
||||
}
|
||||
|
||||
const GENERATED_SYSTEM_MESSAGE_RE = /^System(?: \(untrusted\))?: \[[^\]]+\]\s*/;
|
||||
|
||||
function isGeneratedSystemWrapperMessage(text: string, role: "user" | "assistant"): boolean {
|
||||
if (role !== "user") {
|
||||
return false;
|
||||
}
|
||||
return GENERATED_SYSTEM_MESSAGE_RE.test(text);
|
||||
}
|
||||
|
||||
function isGeneratedCronPromptMessage(text: string, role: "user" | "assistant"): boolean {
|
||||
if (role !== "user") {
|
||||
return false;
|
||||
}
|
||||
return DIRECT_CRON_PROMPT_RE.test(text);
|
||||
}
|
||||
|
||||
function isGeneratedHeartbeatPromptMessage(text: string, role: "user" | "assistant"): boolean {
|
||||
return role === "user" && isHeartbeatUserMessage({ role, content: text }, HEARTBEAT_PROMPT);
|
||||
}
|
||||
|
||||
function sanitizeSessionText(text: string, role: "user" | "assistant"): string | null {
|
||||
const strippedInbound = stripInboundMetadataForUserRole(text, role);
|
||||
const strippedInternal = stripInternalRuntimeContext(strippedInbound);
|
||||
const normalized = normalizeSessionText(strippedInternal);
|
||||
if (!normalized) {
|
||||
return null;
|
||||
}
|
||||
if (isGeneratedSystemWrapperMessage(normalized, role)) {
|
||||
return null;
|
||||
}
|
||||
if (isGeneratedCronPromptMessage(normalized, role)) {
|
||||
return null;
|
||||
}
|
||||
if (isGeneratedHeartbeatPromptMessage(normalized, role)) {
|
||||
return null;
|
||||
}
|
||||
if (isSilentReplyPayloadText(normalized)) {
|
||||
return null;
|
||||
}
|
||||
// Assistant-side machinery acks: HEARTBEAT_OK is the canonical "all clear,
|
||||
// nothing to do" reply to a heartbeat tick. Drop on the assistant side
|
||||
// directly so we do not have to rely on cross-message coupling with the
|
||||
// preceding user message (which a real user could spoof).
|
||||
if (role === "assistant" && normalized === HEARTBEAT_TOKEN) {
|
||||
return null;
|
||||
}
|
||||
const withoutSystemEnvelope = normalized.replace(GENERATED_SYSTEM_MESSAGE_RE, "").trim();
|
||||
if (isExecCompletionEvent(withoutSystemEnvelope)) {
|
||||
return null;
|
||||
}
|
||||
return normalized;
|
||||
}
|
||||
|
||||
export function extractSessionText(
|
||||
content: unknown,
|
||||
role: "user" | "assistant" = "assistant",
|
||||
): string | null {
|
||||
const rawText = collectRawSessionText(content);
|
||||
if (rawText === null) {
|
||||
return null;
|
||||
}
|
||||
return sanitizeSessionText(rawText, role);
|
||||
}
|
||||
|
||||
function parseSessionTimestampMs(
|
||||
record: { timestamp?: unknown },
|
||||
message: { timestamp?: unknown },
|
||||
): number {
|
||||
const candidates = [message.timestamp, record.timestamp];
|
||||
for (const value of candidates) {
|
||||
if (typeof value === "number" && Number.isFinite(value)) {
|
||||
const ms = value > 0 && value < 1e11 ? value * 1000 : value;
|
||||
if (Number.isFinite(ms) && ms > 0) {
|
||||
return ms;
|
||||
}
|
||||
}
|
||||
if (typeof value === "string") {
|
||||
const parsed = Date.parse(value);
|
||||
if (Number.isFinite(parsed) && parsed > 0) {
|
||||
return parsed;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
export async function buildSessionEntry(
|
||||
absPath: string,
|
||||
opts: BuildSessionEntryOptions = {},
|
||||
): Promise<SessionFileEntry | null> {
|
||||
try {
|
||||
const stat = await fs.stat(absPath);
|
||||
if (shouldSkipTranscriptFileForDreaming(absPath)) {
|
||||
return {
|
||||
path: sessionPathForFile(absPath),
|
||||
absPath,
|
||||
mtimeMs: stat.mtimeMs,
|
||||
size: stat.size,
|
||||
hash: hashText("\n\n"),
|
||||
content: "",
|
||||
lineMap: [],
|
||||
messageTimestampsMs: [],
|
||||
};
|
||||
}
|
||||
const raw = await fs.readFile(absPath, "utf-8");
|
||||
const lines = raw.split("\n");
|
||||
const collected: string[] = [];
|
||||
const lineMap: number[] = [];
|
||||
const messageTimestampsMs: number[] = [];
|
||||
const sessionStoreClassification =
|
||||
opts.generatedByDreamingNarrative === undefined || opts.generatedByCronRun === undefined
|
||||
? classifySessionTranscriptFromSessionStore(absPath)
|
||||
: null;
|
||||
let generatedByDreamingNarrative =
|
||||
opts.generatedByDreamingNarrative ??
|
||||
sessionStoreClassification?.generatedByDreamingNarrative ??
|
||||
false;
|
||||
const generatedByCronRun =
|
||||
opts.generatedByCronRun ?? sessionStoreClassification?.generatedByCronRun ?? false;
|
||||
for (let jsonlIdx = 0; jsonlIdx < lines.length; jsonlIdx++) {
|
||||
const line = lines[jsonlIdx];
|
||||
if (!line.trim()) {
|
||||
continue;
|
||||
}
|
||||
let record: unknown;
|
||||
try {
|
||||
record = JSON.parse(line);
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
if (!generatedByDreamingNarrative && isDreamingNarrativeGeneratedRecord(record)) {
|
||||
generatedByDreamingNarrative = true;
|
||||
}
|
||||
if (
|
||||
!record ||
|
||||
typeof record !== "object" ||
|
||||
(record as { type?: unknown }).type !== "message"
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
const message = (record as { message?: unknown }).message as
|
||||
| { role?: unknown; content?: unknown; provenance?: unknown }
|
||||
| undefined;
|
||||
if (!message || typeof message.role !== "string") {
|
||||
continue;
|
||||
}
|
||||
if (message.role !== "user" && message.role !== "assistant") {
|
||||
continue;
|
||||
}
|
||||
if (message.role === "user" && hasInterSessionUserProvenance(message)) {
|
||||
continue;
|
||||
}
|
||||
const rawText = collectRawSessionText(message.content);
|
||||
if (rawText === null) {
|
||||
continue;
|
||||
}
|
||||
const text = sanitizeSessionText(rawText, message.role);
|
||||
if (!text) {
|
||||
// Assistant-side machinery (silent replies, system wrappers) is already
|
||||
// dropped by sanitizeSessionText. We deliberately do NOT use the prior
|
||||
// user message's pattern-match to drop the next assistant message:
|
||||
// user-typed text can match those same patterns (`[cron:...]`,
|
||||
// `System (untrusted): ...`) and a cross-message drop would let users
|
||||
// exfiltrate real assistant replies from the dreaming corpus by
|
||||
// prefixing their own prompt. See PR #70737 review (aisle-research-bot).
|
||||
continue;
|
||||
}
|
||||
if (generatedByDreamingNarrative || generatedByCronRun) {
|
||||
continue;
|
||||
}
|
||||
const safe = redactSensitiveText(text, { mode: "tools" });
|
||||
const label = message.role === "user" ? "User" : "Assistant";
|
||||
const renderedLines = renderSessionExportLines(label, safe);
|
||||
const timestampMs = parseSessionTimestampMs(
|
||||
record as { timestamp?: unknown },
|
||||
message as { timestamp?: unknown },
|
||||
);
|
||||
collected.push(...renderedLines);
|
||||
lineMap.push(...renderedLines.map(() => jsonlIdx + 1));
|
||||
messageTimestampsMs.push(...renderedLines.map(() => timestampMs));
|
||||
}
|
||||
const content = collected.join("\n");
|
||||
return {
|
||||
path: sessionPathForFile(absPath),
|
||||
absPath,
|
||||
mtimeMs: stat.mtimeMs,
|
||||
size: stat.size,
|
||||
hash: hashText(content + "\n" + lineMap.join(",") + "\n" + messageTimestampsMs.join(",")),
|
||||
content,
|
||||
lineMap,
|
||||
messageTimestampsMs,
|
||||
...(generatedByDreamingNarrative ? { generatedByDreamingNarrative: true } : {}),
|
||||
...(generatedByCronRun ? { generatedByCronRun: true } : {}),
|
||||
};
|
||||
} catch (err) {
|
||||
void logSessionFileReadFailure(absPath, err);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
export * from "../../../packages/memory-host-sdk/src/host/session-files.js";
|
||||
|
||||
@@ -1,107 +1 @@
|
||||
export type MemorySource = "memory" | "sessions";
|
||||
|
||||
export type MemorySearchResult = {
|
||||
path: string;
|
||||
startLine: number;
|
||||
endLine: number;
|
||||
score: number;
|
||||
vectorScore?: number;
|
||||
textScore?: number;
|
||||
snippet: string;
|
||||
source: MemorySource;
|
||||
citation?: string;
|
||||
};
|
||||
|
||||
export type MemoryEmbeddingProbeResult = {
|
||||
ok: boolean;
|
||||
error?: string;
|
||||
checked?: boolean;
|
||||
cached?: boolean;
|
||||
checkedAtMs?: number;
|
||||
cacheExpiresAtMs?: number;
|
||||
};
|
||||
|
||||
export type MemorySyncProgressUpdate = {
|
||||
completed: number;
|
||||
total: number;
|
||||
label?: string;
|
||||
};
|
||||
|
||||
export type MemorySearchRuntimeDebug = {
|
||||
backend: "builtin" | "qmd";
|
||||
configuredMode?: string;
|
||||
effectiveMode?: string;
|
||||
fallback?: string;
|
||||
};
|
||||
|
||||
export type MemoryReadResult = {
|
||||
text: string;
|
||||
path: string;
|
||||
truncated?: boolean;
|
||||
from?: number;
|
||||
lines?: number;
|
||||
nextFrom?: number;
|
||||
};
|
||||
|
||||
export type MemoryProviderStatus = {
|
||||
backend: "builtin" | "qmd";
|
||||
provider: string;
|
||||
model?: string;
|
||||
requestedProvider?: string;
|
||||
files?: number;
|
||||
chunks?: number;
|
||||
dirty?: boolean;
|
||||
workspaceDir?: string;
|
||||
dbPath?: string;
|
||||
extraPaths?: string[];
|
||||
sources?: MemorySource[];
|
||||
sourceCounts?: Array<{ source: MemorySource; files: number; chunks: number }>;
|
||||
cache?: { enabled: boolean; entries?: number; maxEntries?: number };
|
||||
fts?: { enabled: boolean; available: boolean; error?: string };
|
||||
fallback?: { from: string; reason?: string };
|
||||
vector?: {
|
||||
enabled: boolean;
|
||||
available?: boolean;
|
||||
extensionPath?: string;
|
||||
loadError?: string;
|
||||
dims?: number;
|
||||
};
|
||||
batch?: {
|
||||
enabled: boolean;
|
||||
failures: number;
|
||||
limit: number;
|
||||
wait: boolean;
|
||||
concurrency: number;
|
||||
pollIntervalMs: number;
|
||||
timeoutMs: number;
|
||||
lastError?: string;
|
||||
lastProvider?: string;
|
||||
};
|
||||
custom?: Record<string, unknown>;
|
||||
};
|
||||
|
||||
export interface MemorySearchManager {
|
||||
search(
|
||||
query: string,
|
||||
opts?: {
|
||||
maxResults?: number;
|
||||
minScore?: number;
|
||||
sessionKey?: string;
|
||||
qmdSearchModeOverride?: "query" | "search" | "vsearch";
|
||||
onDebug?: (debug: MemorySearchRuntimeDebug) => void;
|
||||
sources?: MemorySource[];
|
||||
},
|
||||
): Promise<MemorySearchResult[]>;
|
||||
readFile(params: { relPath: string; from?: number; lines?: number }): Promise<MemoryReadResult>;
|
||||
status(): MemoryProviderStatus;
|
||||
sync?(params?: {
|
||||
reason?: string;
|
||||
force?: boolean;
|
||||
sessionFiles?: string[];
|
||||
progress?: (update: MemorySyncProgressUpdate) => void;
|
||||
}): Promise<void>;
|
||||
getCachedEmbeddingAvailability?(): MemoryEmbeddingProbeResult | null;
|
||||
probeEmbeddingAvailability(): Promise<MemoryEmbeddingProbeResult>;
|
||||
probeVectorAvailability(): Promise<boolean>;
|
||||
close?(): Promise<void>;
|
||||
}
|
||||
export * from "../../../packages/memory-host-sdk/src/host/types.js";
|
||||
|
||||
Reference in New Issue
Block a user