diff --git a/CHANGELOG.md b/CHANGELOG.md index 6959abd2737..f7d026c773b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,6 +38,9 @@ Docs: https://docs.openclaw.ai ### Fixes +- Plugins/startup: remove ownerless bundled runtime-dependency install locks + after a short grace window and include lock owner details when startup times + out waiting for a plugin runtime-deps lock. Thanks @steipete. - Agents/replies: forward sanitized underlying agent failure details on external channels instead of replacing unknown failures with a generic retry message. Thanks @steipete. diff --git a/src/plugins/bundled-runtime-deps.test.ts b/src/plugins/bundled-runtime-deps.test.ts index a51ccb3350c..269bc557442 100644 --- a/src/plugins/bundled-runtime-deps.test.ts +++ b/src/plugins/bundled-runtime-deps.test.ts @@ -766,6 +766,72 @@ describe("ensureBundledPluginRuntimeDeps", () => { ).toBe(false); }); + it("does not expire fresh ownerless runtime-deps install locks", () => { + expect( + bundledRuntimeDepsTesting.shouldRemoveRuntimeDepsLock( + { lockDirMtimeMs: 1_000 }, + 31_000, + () => true, + ), + ).toBe(false); + }); + + it("does not expire ownerless runtime-deps install locks when the owner file changed recently", () => { + expect( + bundledRuntimeDepsTesting.shouldRemoveRuntimeDepsLock( + { lockDirMtimeMs: 1_000, ownerFileMtimeMs: 31_000 }, + 61_000, + () => true, + ), + ).toBe(false); + }); + + it("expires ownerless runtime-deps install locks after the owner write grace window", () => { + expect( + bundledRuntimeDepsTesting.shouldRemoveRuntimeDepsLock( + { lockDirMtimeMs: 1_000 }, + 31_001, + () => true, + ), + ).toBe(true); + }); + + it("expires ownerless runtime-deps install locks when lock and owner file are stale", () => { + expect( + bundledRuntimeDepsTesting.shouldRemoveRuntimeDepsLock( + { lockDirMtimeMs: 1_000, ownerFileMtimeMs: 2_000 }, + 32_001, + () => true, + ), + ).toBe(true); + }); + + it("includes runtime-deps lock owner details in timeout messages", () => { + const message = bundledRuntimeDepsTesting.formatRuntimeDepsLockTimeoutMessage({ + lockDir: "/tmp/openclaw-plugin/.openclaw-runtime-deps.lock", + owner: { + pid: 0, + createdAtMs: 1_000, + ownerFileState: "invalid", + ownerFilePath: "/tmp/openclaw-plugin/.openclaw-runtime-deps.lock/owner.json", + ownerFileMtimeMs: 2_500, + ownerFileIsSymlink: true, + lockDirMtimeMs: 2_000, + }, + waitedMs: 300_123, + nowMs: 303_000, + }); + + expect(message).toContain("waited=300123ms"); + expect(message).toContain("ownerFile=invalid"); + expect(message).toContain("ownerFileSymlink=true"); + expect(message).toContain("pid=0 alive=false"); + expect(message).toContain("ownerAge=302000ms"); + expect(message).toContain("ownerFileAge=300500ms"); + expect(message).toContain("lockAge=301000ms"); + expect(message).toContain(".openclaw-runtime-deps.lock/owner.json"); + }); + it("removes stale runtime-deps install locks before repairing deps", () => { const packageRoot = makeTempDir(); const pluginRoot = path.join(packageRoot, "dist", "extensions", "openai"); @@ -808,6 +874,100 @@ describe("ensureBundledPluginRuntimeDeps", () => { expect(fs.existsSync(lockDir)).toBe(false); }); + it("removes stale malformed runtime-deps install locks before repairing deps", () => { + const packageRoot = makeTempDir(); + const pluginRoot = path.join(packageRoot, "dist", "extensions", "browser"); + fs.mkdirSync(pluginRoot, { recursive: true }); + fs.writeFileSync( + path.join(pluginRoot, "package.json"), + JSON.stringify({ + dependencies: { + "browser-runtime": "1.0.0", + }, + }), + ); + const installRoot = resolveBundledRuntimeDependencyInstallRoot(pluginRoot, { env: {} }); + const lockDir = path.join(installRoot, ".openclaw-runtime-deps.lock"); + fs.mkdirSync(lockDir, { recursive: true }); + const ownerPath = path.join(lockDir, "owner.json"); + fs.writeFileSync(ownerPath, "{", "utf8"); + fs.utimesSync(ownerPath, new Date(0), new Date(0)); + fs.utimesSync(lockDir, new Date(0), new Date(0)); + + const calls: BundledRuntimeDepsInstallParams[] = []; + const result = ensureBundledPluginRuntimeDeps({ + env: {}, + installDeps: (params) => { + calls.push(params); + fs.mkdirSync(path.join(params.installRoot, "node_modules", "browser-runtime"), { + recursive: true, + }); + fs.writeFileSync( + path.join(params.installRoot, "node_modules", "browser-runtime", "package.json"), + JSON.stringify({ name: "browser-runtime", version: "1.0.0" }), + ); + }, + pluginId: "browser", + pluginRoot, + }); + + expect(result).toEqual({ + installedSpecs: ["browser-runtime@1.0.0"], + retainSpecs: ["browser-runtime@1.0.0"], + }); + expect(calls).toHaveLength(1); + expect(fs.existsSync(lockDir)).toBe(false); + }); + + const itSupportsSymlinks = process.platform === "win32" ? it.skip : it; + itSupportsSymlinks( + "removes stale runtime-deps install locks with broken owner symlinks before repairing deps", + () => { + const packageRoot = makeTempDir(); + const pluginRoot = path.join(packageRoot, "dist-runtime", "extensions", "browser"); + fs.mkdirSync(pluginRoot, { recursive: true }); + fs.writeFileSync( + path.join(pluginRoot, "package.json"), + JSON.stringify({ + dependencies: { + "browser-runtime": "1.0.0", + }, + }), + ); + const installRoot = resolveBundledRuntimeDependencyInstallRoot(pluginRoot, { env: {} }); + const lockDir = path.join(installRoot, ".openclaw-runtime-deps.lock"); + fs.mkdirSync(lockDir, { recursive: true }); + const ownerPath = path.join(lockDir, "owner.json"); + fs.symlinkSync("../missing-owner.json", ownerPath); + fs.lutimesSync(ownerPath, new Date(0), new Date(0)); + fs.utimesSync(lockDir, new Date(0), new Date(0)); + + const calls: BundledRuntimeDepsInstallParams[] = []; + const result = ensureBundledPluginRuntimeDeps({ + env: {}, + installDeps: (params) => { + calls.push(params); + fs.mkdirSync(path.join(params.installRoot, "node_modules", "browser-runtime"), { + recursive: true, + }); + fs.writeFileSync( + path.join(params.installRoot, "node_modules", "browser-runtime", "package.json"), + JSON.stringify({ name: "browser-runtime", version: "1.0.0" }), + ); + }, + pluginId: "browser", + pluginRoot, + }); + + expect(result).toEqual({ + installedSpecs: ["browser-runtime@1.0.0"], + retainSpecs: ["browser-runtime@1.0.0"], + }); + expect(calls).toHaveLength(1); + expect(fs.existsSync(lockDir)).toBe(false); + }, + ); + it("does not install when runtime deps are only workspace links", () => { const packageRoot = makeTempDir(); const extensionsRoot = path.join(packageRoot, "dist", "extensions"); diff --git a/src/plugins/bundled-runtime-deps.ts b/src/plugins/bundled-runtime-deps.ts index facef964ab9..32e59f67306 100644 --- a/src/plugins/bundled-runtime-deps.ts +++ b/src/plugins/bundled-runtime-deps.ts @@ -54,6 +54,7 @@ const BUNDLED_RUNTIME_DEPS_LOCK_OWNER_FILE = "owner.json"; const BUNDLED_RUNTIME_DEPS_LOCK_WAIT_MS = 100; const BUNDLED_RUNTIME_DEPS_LOCK_TIMEOUT_MS = 5 * 60_000; const BUNDLED_RUNTIME_DEPS_LOCK_STALE_MS = 10 * 60_000; +const BUNDLED_RUNTIME_DEPS_OWNERLESS_LOCK_STALE_MS = 30_000; export type BundledRuntimeDepsNpmRunner = { command: string; @@ -186,21 +187,80 @@ function isProcessAlive(pid: number): boolean { try { process.kill(pid, 0); return true; - } catch { - return false; + } catch (error) { + return (error as NodeJS.ErrnoException).code === "EPERM"; } } -function readRuntimeDepsLockOwner(lockDir: string): { pid?: number; createdAtMs?: number } { - const owner = readJsonObject(path.join(lockDir, BUNDLED_RUNTIME_DEPS_LOCK_OWNER_FILE)); +type RuntimeDepsLockOwner = { + pid?: number; + createdAtMs?: number; + ownerFileState: "ok" | "missing" | "invalid"; + ownerFilePath: string; + ownerFileMtimeMs?: number; + ownerFileIsSymlink?: boolean; + lockDirMtimeMs?: number; +}; + +function readRuntimeDepsLockOwner(lockDir: string): RuntimeDepsLockOwner { + const ownerFilePath = path.join(lockDir, BUNDLED_RUNTIME_DEPS_LOCK_OWNER_FILE); + let owner: JsonObject | null = null; + let ownerFileState: RuntimeDepsLockOwner["ownerFileState"] = "missing"; + let ownerFileMtimeMs: number | undefined; + let ownerFileIsSymlink: boolean | undefined; + try { + const ownerFileStat = fs.lstatSync(ownerFilePath); + ownerFileMtimeMs = ownerFileStat.mtimeMs; + ownerFileIsSymlink = ownerFileStat.isSymbolicLink(); + } catch { + // The owner file may not exist yet, or may have been removed by the lock owner. + } + try { + const parsed = JSON.parse(fs.readFileSync(ownerFilePath, "utf8")) as unknown; + if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) { + owner = parsed as JsonObject; + ownerFileState = "ok"; + } else { + ownerFileState = "invalid"; + } + } catch (error) { + ownerFileState = + (error as NodeJS.ErrnoException).code === "ENOENT" && ownerFileMtimeMs === undefined + ? "missing" + : "invalid"; + } + let lockDirMtimeMs: number | undefined; + try { + lockDirMtimeMs = fs.statSync(lockDir).mtimeMs; + } catch { + // The lock may have disappeared between the mkdir failure and diagnostics. + } return { pid: typeof owner?.pid === "number" ? owner.pid : undefined, createdAtMs: typeof owner?.createdAtMs === "number" ? owner.createdAtMs : undefined, + ownerFileState, + ownerFilePath, + ownerFileMtimeMs, + ownerFileIsSymlink, + lockDirMtimeMs, }; } +function latestFiniteMs(values: readonly (number | undefined)[]): number | undefined { + let latest: number | undefined; + for (const value of values) { + if (typeof value !== "number" || !Number.isFinite(value)) { + continue; + } + if (latest === undefined || value > latest) { + latest = value; + } + } + return latest; +} + function shouldRemoveRuntimeDepsLock( - owner: { pid?: number; createdAtMs?: number }, + owner: Pick, nowMs: number, isAlive: (pid: number) => boolean = isProcessAlive, ): boolean { @@ -208,13 +268,55 @@ function shouldRemoveRuntimeDepsLock( return !isAlive(owner.pid); } + if (typeof owner.createdAtMs === "number") { + return nowMs - owner.createdAtMs > BUNDLED_RUNTIME_DEPS_LOCK_STALE_MS; + } + + const ownerlessObservedAtMs = latestFiniteMs([owner.lockDirMtimeMs, owner.ownerFileMtimeMs]); return ( - typeof owner.createdAtMs === "number" && - nowMs - owner.createdAtMs > BUNDLED_RUNTIME_DEPS_LOCK_STALE_MS + typeof ownerlessObservedAtMs === "number" && + nowMs - ownerlessObservedAtMs > BUNDLED_RUNTIME_DEPS_OWNERLESS_LOCK_STALE_MS + ); +} + +function formatDurationMs(ms: number | undefined): string { + return typeof ms === "number" && Number.isFinite(ms) ? `${Math.max(0, Math.round(ms))}ms` : "n/a"; +} + +function formatRuntimeDepsLockTimeoutMessage(params: { + lockDir: string; + owner: RuntimeDepsLockOwner; + waitedMs: number; + nowMs: number; +}): string { + const ownerAgeMs = + typeof params.owner.createdAtMs === "number" + ? params.nowMs - params.owner.createdAtMs + : undefined; + const lockAgeMs = + typeof params.owner.lockDirMtimeMs === "number" + ? params.nowMs - params.owner.lockDirMtimeMs + : undefined; + const ownerFileAgeMs = + typeof params.owner.ownerFileMtimeMs === "number" + ? params.nowMs - params.owner.ownerFileMtimeMs + : undefined; + const pidDetail = + typeof params.owner.pid === "number" + ? `pid=${params.owner.pid} alive=${isProcessAlive(params.owner.pid)}` + : "pid=missing"; + const ownerFileSymlink = + typeof params.owner.ownerFileIsSymlink === "boolean" ? params.owner.ownerFileIsSymlink : "n/a"; + return ( + `Timed out waiting for bundled runtime deps lock at ${params.lockDir} ` + + `(waited=${formatDurationMs(params.waitedMs)}, ownerFile=${params.owner.ownerFileState}, ownerFileSymlink=${ownerFileSymlink}, ` + + `${pidDetail}, ownerAge=${formatDurationMs(ownerAgeMs)}, ownerFileAge=${formatDurationMs(ownerFileAgeMs)}, lockAge=${formatDurationMs(lockAgeMs)}, ` + + `ownerFilePath=${params.owner.ownerFilePath}). If no OpenClaw/npm install is running, remove the lock directory and retry.` ); } export const __testing = { + formatRuntimeDepsLockTimeoutMessage, shouldRemoveRuntimeDepsLock, }; @@ -240,11 +342,16 @@ function withBundledRuntimeDepsInstallRootLock(installRoot: string, run: () = while (!locked) { try { fs.mkdirSync(lockDir); - fs.writeFileSync( - path.join(lockDir, BUNDLED_RUNTIME_DEPS_LOCK_OWNER_FILE), - `${JSON.stringify({ pid: process.pid, createdAtMs: Date.now() }, null, 2)}\n`, - "utf8", - ); + try { + fs.writeFileSync( + path.join(lockDir, BUNDLED_RUNTIME_DEPS_LOCK_OWNER_FILE), + `${JSON.stringify({ pid: process.pid, createdAtMs: Date.now() }, null, 2)}\n`, + "utf8", + ); + } catch (ownerWriteError) { + fs.rmSync(lockDir, { recursive: true, force: true }); + throw ownerWriteError; + } locked = true; } catch (error) { const code = (error as NodeJS.ErrnoException).code; @@ -252,10 +359,19 @@ function withBundledRuntimeDepsInstallRootLock(installRoot: string, run: () = throw error; } removeRuntimeDepsLockIfStale(lockDir, Date.now()); - if (Date.now() - startedAt > BUNDLED_RUNTIME_DEPS_LOCK_TIMEOUT_MS) { - throw new Error(`Timed out waiting for bundled runtime deps lock at ${lockDir}`, { - cause: error, - }); + const nowMs = Date.now(); + if (nowMs - startedAt > BUNDLED_RUNTIME_DEPS_LOCK_TIMEOUT_MS) { + throw new Error( + formatRuntimeDepsLockTimeoutMessage({ + lockDir, + owner: readRuntimeDepsLockOwner(lockDir), + waitedMs: nowMs - startedAt, + nowMs, + }), + { + cause: error, + }, + ); } sleepSync(BUNDLED_RUNTIME_DEPS_LOCK_WAIT_MS); }