From f3fd0eedff215967eb75361d241dd5e6cea602e8 Mon Sep 17 00:00:00 2001 From: kunpeng-ai-lab <1370321215@qq.com> Date: Sat, 2 May 2026 18:07:48 +0800 Subject: [PATCH] fix(memory): retry transient index swaps on Windows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #64187. Adds bounded retry handling for transient Windows rename failures (`EBUSY`, `EPERM`, `EACCES`) during memory-core SQLite atomic reindex swaps. Keeps missing optional SQLite sidecars ignored and non-transient rename failures fail-fast. Verification: - PR CI green, including `check`, `check-additional`, `checks-node-core`, `build-smoke`, and security fast checks - Contributor local proof: `pnpm exec vitest run extensions/memory-core/src/memory/manager.atomic-reindex.test.ts` - Contributor local proof: `pnpm lint:extensions -- extensions/memory-core/src/memory/manager-atomic-reindex.ts extensions/memory-core/src/memory/manager.atomic-reindex.test.ts` - Contributor local proof: `pnpm check:changed` Co-authored-by: 鲲鹏AI探索局 --- CHANGELOG.md | 1 + .../src/memory/manager-atomic-reindex.ts | 75 ++++++++++++++++--- .../src/memory/manager.atomic-reindex.test.ts | 74 +++++++++++++++++- 3 files changed, 138 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 04591c6fcfb..747e86b9a94 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -68,6 +68,7 @@ Docs: https://docs.openclaw.ai - Music generation: raise too-small tool timeouts to the provider-safe 10-second floor and collapse cascading abort fallback errors into a clearer root-cause summary. Thanks @shakkernerd. - Memory-core/dreaming: include the primary runtime workspace in multi-agent dreaming sweeps without mixing main-agent session transcripts into configured subagent workspaces. Fixes #70014. Thanks @ttomiczek. - Control UI: add tab/RPC timing attribution and decouple slow Overview/Cron secondary refreshes so Sessions navigation gets immediate visible feedback. Refs #64004. Thanks @WaMaSeDu. +- Memory: retry transient SQLite index file swaps during atomic reindex on Windows, so brief `EBUSY`, `EPERM`, or `EACCES` locks do not fail memory rebuilds. Fixes #64187. Thanks @kunpeng-ai-lab. - Telegram/startup: use the existing `getMe` request guard for the gateway bot probe instead of a fixed 2.5-second budget, and honor higher `timeoutSeconds` configs for slow Telegram API paths. Fixes #75783. Thanks @tankotan. - Telegram/models: make model picker confirmations say selections are session-scoped and do not change the agent's persistent default. Fixes #75965. Thanks @sd1114820. - Control UI/slash commands: keep fallback command metadata on a browser-safe registry path, so provider thinking runtime imports cannot blank the Web UI with `process is not defined`. Fixes #75987. Thanks @novkien. diff --git a/extensions/memory-core/src/memory/manager-atomic-reindex.ts b/extensions/memory-core/src/memory/manager-atomic-reindex.ts index 9fac4998d80..c7f8026e997 100644 --- a/extensions/memory-core/src/memory/manager-atomic-reindex.ts +++ b/extensions/memory-core/src/memory/manager-atomic-reindex.ts @@ -1,24 +1,79 @@ import { randomUUID } from "node:crypto"; import fs from "node:fs/promises"; +import { setTimeout as sleep } from "node:timers/promises"; -async function moveMemoryIndexFiles(sourceBase: string, targetBase: string): Promise { +type MemoryIndexFileOps = { + rename: typeof fs.rename; + rm: typeof fs.rm; + wait: (ms: number) => Promise; +}; + +type MoveMemoryIndexFilesOptions = { + fileOps?: MemoryIndexFileOps; + maxRenameAttempts?: number; + renameRetryDelayMs?: number; +}; + +const defaultFileOps: MemoryIndexFileOps = { + rename: fs.rename, + rm: fs.rm, + wait: sleep, +}; + +const transientRenameErrorCodes = new Set(["EBUSY", "EPERM", "EACCES"]); +const defaultMaxRenameAttempts = 6; +const defaultRenameRetryDelayMs = 25; + +function isTransientRenameError(err: unknown): boolean { + return transientRenameErrorCodes.has((err as NodeJS.ErrnoException).code ?? ""); +} + +async function renameWithRetry( + source: string, + target: string, + options: Required, +): Promise { + for (let attempt = 1; attempt <= options.maxRenameAttempts; attempt++) { + try { + await options.fileOps.rename(source, target); + return; + } catch (err) { + if ((err as NodeJS.ErrnoException).code === "ENOENT") { + return; + } + if (!isTransientRenameError(err) || attempt === options.maxRenameAttempts) { + throw err; + } + await options.fileOps.wait(options.renameRetryDelayMs * attempt); + } + } + throw new Error("rename retry loop exited unexpectedly"); +} + +export async function moveMemoryIndexFiles( + sourceBase: string, + targetBase: string, + options: MoveMemoryIndexFilesOptions = {}, +): Promise { + const resolvedOptions: Required = { + fileOps: options.fileOps ?? defaultFileOps, + maxRenameAttempts: Math.max(1, options.maxRenameAttempts ?? defaultMaxRenameAttempts), + renameRetryDelayMs: options.renameRetryDelayMs ?? defaultRenameRetryDelayMs, + }; const suffixes = ["", "-wal", "-shm"]; for (const suffix of suffixes) { const source = `${sourceBase}${suffix}`; const target = `${targetBase}${suffix}`; - try { - await fs.rename(source, target); - } catch (err) { - if ((err as NodeJS.ErrnoException).code !== "ENOENT") { - throw err; - } - } + await renameWithRetry(source, target, resolvedOptions); } } -async function removeMemoryIndexFiles(basePath: string): Promise { +async function removeMemoryIndexFiles( + basePath: string, + fileOps: MemoryIndexFileOps = defaultFileOps, +): Promise { const suffixes = ["", "-wal", "-shm"]; - await Promise.all(suffixes.map((suffix) => fs.rm(`${basePath}${suffix}`, { force: true }))); + await Promise.all(suffixes.map((suffix) => fileOps.rm(`${basePath}${suffix}`, { force: true }))); } async function swapMemoryIndexFiles(targetPath: string, tempPath: string): Promise { diff --git a/extensions/memory-core/src/memory/manager.atomic-reindex.test.ts b/extensions/memory-core/src/memory/manager.atomic-reindex.test.ts index 263290ed0c8..18c192e96af 100644 --- a/extensions/memory-core/src/memory/manager.atomic-reindex.test.ts +++ b/extensions/memory-core/src/memory/manager.atomic-reindex.test.ts @@ -2,8 +2,8 @@ import fs from "node:fs/promises"; import os from "node:os"; import path from "node:path"; import { DatabaseSync } from "node:sqlite"; -import { afterAll, beforeAll, beforeEach, describe, expect, it } from "vitest"; -import { runMemoryAtomicReindex } from "./manager-atomic-reindex.js"; +import { afterAll, beforeAll, beforeEach, describe, expect, it, vi } from "vitest"; +import { moveMemoryIndexFiles, runMemoryAtomicReindex } from "./manager-atomic-reindex.js"; describe("memory manager atomic reindex", () => { let fixtureRoot = ""; @@ -57,6 +57,76 @@ describe("memory manager atomic reindex", () => { expect(readChunkMarker(indexPath)).toBe("after"); await expect(fs.access(tempIndexPath)).rejects.toThrow(); }); + + it("retries transient rename failures during index swaps", async () => { + const rename = vi + .fn() + .mockRejectedValueOnce(Object.assign(new Error("busy"), { code: "EBUSY" })) + .mockResolvedValue(undefined); + const wait = vi.fn().mockResolvedValue(undefined); + + await moveMemoryIndexFiles("index.sqlite.tmp", "index.sqlite", { + fileOps: { rename, rm: fs.rm, wait }, + maxRenameAttempts: 3, + renameRetryDelayMs: 10, + }); + + expect(rename).toHaveBeenCalledTimes(4); + expect(wait).toHaveBeenCalledTimes(1); + expect(wait).toHaveBeenCalledWith(10); + }); + + it("throws after retrying transient rename failures up to the attempt limit", async () => { + const rename = vi.fn().mockRejectedValue(Object.assign(new Error("busy"), { code: "EBUSY" })); + const wait = vi.fn().mockResolvedValue(undefined); + + await expect( + moveMemoryIndexFiles("index.sqlite.tmp", "index.sqlite", { + fileOps: { rename, rm: fs.rm, wait }, + maxRenameAttempts: 3, + renameRetryDelayMs: 10, + }), + ).rejects.toMatchObject({ code: "EBUSY" }); + + expect(rename).toHaveBeenCalledTimes(3); + expect(wait).toHaveBeenCalledTimes(2); + expect(wait).toHaveBeenNthCalledWith(1, 10); + expect(wait).toHaveBeenNthCalledWith(2, 20); + }); + + it("does not retry missing optional sqlite sidecar files", async () => { + const rename = vi + .fn() + .mockResolvedValueOnce(undefined) + .mockRejectedValueOnce(Object.assign(new Error("missing wal"), { code: "ENOENT" })) + .mockRejectedValueOnce(Object.assign(new Error("missing shm"), { code: "ENOENT" })); + const wait = vi.fn().mockResolvedValue(undefined); + + await moveMemoryIndexFiles("index.sqlite.tmp", "index.sqlite", { + fileOps: { rename, rm: fs.rm, wait }, + maxRenameAttempts: 3, + renameRetryDelayMs: 10, + }); + + expect(rename).toHaveBeenCalledTimes(3); + expect(wait).not.toHaveBeenCalled(); + }); + + it("does not retry non-transient rename failures", async () => { + const rename = vi.fn().mockRejectedValue(Object.assign(new Error("invalid"), { code: "EINVAL" })); + const wait = vi.fn().mockResolvedValue(undefined); + + await expect( + moveMemoryIndexFiles("index.sqlite.tmp", "index.sqlite", { + fileOps: { rename, rm: fs.rm, wait }, + maxRenameAttempts: 3, + renameRetryDelayMs: 10, + }), + ).rejects.toMatchObject({ code: "EINVAL" }); + + expect(rename).toHaveBeenCalledTimes(1); + expect(wait).not.toHaveBeenCalled(); + }); }); function writeChunkMarker(dbPath: string, marker: string): void {