diff --git a/CHANGELOG.md b/CHANGELOG.md index dc4c42437d5..00d9d85dab8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ Docs: https://docs.openclaw.ai - Plugins: discover provider plugins from `setup.providers[].envVars` credentials during provider discovery while keeping the deprecated `providerAuthEnvVars` fallback. (#81542) Thanks @JARVIS-Glasses. - Docs/Codex harness: clarify that per-agent `CODEX_HOME` isolates `~/.codex` while inherited `HOME` intentionally keeps `.agents` discovery and subprocess user-home state available. - CLI/plugins: keep bare plugin and parent-command help on the lightweight path, avoiding plugin registry discovery before rendering help. +- Auth: reclaim dead-owner stale file locks before retrying locked writes, so crashed OAuth refreshes no longer wedge `auth-profiles.json` until manual cleanup. - CLI tables: preserve muted/color styling on wrapped continuation lines after multiline cells, keeping `openclaw plugins list` descriptions readable. - Process execution: collapse case-insensitive duplicate child environment keys on Windows so caller-provided overrides such as `PATH` cannot be shadowed by host `Path`. - Browser CLI: request the existing `operator.admin` gateway scope explicitly for browser control commands, avoiding unnecessary scope-upgrade approval loops. Fixes #81555. (#81716) Thanks @joshavant. diff --git a/src/plugin-sdk/file-lock.test.ts b/src/plugin-sdk/file-lock.test.ts index 60494e4dd80..1c31d6e0832 100644 --- a/src/plugin-sdk/file-lock.test.ts +++ b/src/plugin-sdk/file-lock.test.ts @@ -5,6 +5,7 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import { acquireFileLock, drainFileLockStateForTest, + FILE_LOCK_STALE_ERROR_CODE, FILE_LOCK_TIMEOUT_ERROR_CODE, resetFileLockStateForTest, } from "./file-lock.js"; @@ -55,6 +56,70 @@ describe("acquireFileLock", () => { ); }, 5_000); + it("removes a reported stale lock when its owner pid is dead", async () => { + const filePath = path.join(tempDir, "auth-profiles.json"); + const lockPath = `${filePath}.lock`; + const options = { + retries: { + retries: 0, + factor: 1, + minTimeout: 1, + maxTimeout: 1, + }, + stale: 10, + } as const; + + await fs.writeFile( + lockPath, + JSON.stringify({ pid: 999_999, createdAt: new Date(Date.now() - 60_000).toISOString() }), + "utf8", + ); + + const lock = await acquireFileLock(filePath, options); + try { + await expect(fs.realpath(lock.lockPath)).resolves.toBe(await fs.realpath(lockPath)); + await expect(fs.readFile(lockPath, "utf8")).resolves.toContain(`"pid"`); + } finally { + await lock.release(); + } + }); + + it("keeps a reported stale lock when its owner pid is alive", async () => { + const filePath = path.join(tempDir, "live-owner"); + const lockPath = `${filePath}.lock`; + const options = { + retries: { + retries: 0, + factor: 1, + minTimeout: 1, + maxTimeout: 1, + }, + stale: 10, + } as const; + + await fs.writeFile( + lockPath, + JSON.stringify({ pid: process.pid, createdAt: new Date(Date.now() - 60_000).toISOString() }), + "utf8", + ); + + let caught: { lockPath?: string } | undefined; + await expect( + (async () => { + try { + await acquireFileLock(filePath, options); + } catch (err) { + caught = err as { lockPath?: string }; + throw err; + } + })(), + ).rejects.toMatchObject({ + code: FILE_LOCK_STALE_ERROR_CODE, + }); + await expect(fs.realpath(caught?.lockPath ?? "")).resolves.toBe(await fs.realpath(lockPath)); + await expect(fs.readFile(lockPath, "utf8")).resolves.toContain(`"pid":${process.pid}`); + }); + it("closes an opened lock handle when writing the owner payload fails", async () => { const filePath = path.join(tempDir, "write-fails"); const writeError = new Error("owner write failed"); diff --git a/src/plugin-sdk/file-lock.ts b/src/plugin-sdk/file-lock.ts index 11130538957..a33780a7da5 100644 --- a/src/plugin-sdk/file-lock.ts +++ b/src/plugin-sdk/file-lock.ts @@ -1,4 +1,5 @@ import "../infra/fs-safe-defaults.js"; +import fs from "node:fs/promises"; import { acquireFileLock as acquireFsSafeFileLock, drainFileLockManagerForTest, @@ -28,14 +29,25 @@ export type FileLockHandle = { }; export const FILE_LOCK_TIMEOUT_ERROR_CODE = "file_lock_timeout"; +export const FILE_LOCK_STALE_ERROR_CODE = "file_lock_stale"; export type FileLockTimeoutError = Error & { code: typeof FILE_LOCK_TIMEOUT_ERROR_CODE; lockPath: string; }; +export type FileLockStaleError = Error & { + code: typeof FILE_LOCK_STALE_ERROR_CODE; + lockPath: string; +}; + const FILE_LOCK_MANAGER_KEY = "openclaw.plugin-sdk.file-lock"; +type LockFileSnapshot = { + raw: string; + payload: Record | null; +}; + function readLockPayload(value: Record | null): LockFilePayload | null { if (!value) { return null; @@ -63,13 +75,98 @@ async function shouldReclaimPluginLock(params: { return true; } -function normalizeTimeoutError(err: unknown): never { +function isFileLockError(error: unknown, code: string): boolean { + return (error as { code?: unknown } | null)?.code === code; +} + +async function readLockFileSnapshot(lockPath: string): Promise { + let raw: string; + try { + raw = await fs.readFile(lockPath, "utf8"); + } catch (err) { + if ((err as NodeJS.ErrnoException).code === "ENOENT") { + return null; + } + throw err; + } + + try { + const parsed = JSON.parse(raw) as unknown; + return { + raw, + payload: + parsed && typeof parsed === "object" && !Array.isArray(parsed) + ? (parsed as Record) + : null, + }; + } catch { + return { raw, payload: null }; + } +} + +function shouldRemoveReportedStalePluginLock(params: { + payload: Record | null; + staleMs: number; + nowMs: number; +}): boolean { + const payload = readLockPayload(params.payload); + if (payload?.pid) { + return !isPidAlive(payload.pid); + } + if (payload?.createdAt) { + const createdAt = Date.parse(payload.createdAt); + return !Number.isFinite(createdAt) || params.nowMs - createdAt > params.staleMs; + } + return true; +} + +async function removeReportedStaleLockIfStillStale(params: { + lockPath: string; + staleMs: number; +}): Promise { + const snapshot = await readLockFileSnapshot(params.lockPath); + if (!snapshot) { + return true; + } + if ( + !shouldRemoveReportedStalePluginLock({ + payload: snapshot.payload, + staleMs: params.staleMs, + nowMs: Date.now(), + }) + ) { + return false; + } + + const current = await readLockFileSnapshot(params.lockPath); + if (!current) { + return true; + } + if (current.raw !== snapshot.raw) { + return false; + } + + try { + await fs.unlink(params.lockPath); + return true; + } catch (err) { + return (err as NodeJS.ErrnoException).code === "ENOENT"; + } +} + +function normalizeLockError(err: unknown): never { if ((err as { code?: unknown }).code === FILE_LOCK_TIMEOUT_ERROR_CODE) { throw Object.assign(new Error((err as Error).message), { code: FILE_LOCK_TIMEOUT_ERROR_CODE, lockPath: (err as { lockPath?: string }).lockPath ?? "", }) as FileLockTimeoutError; } + if ((err as { code?: unknown }).code === FILE_LOCK_STALE_ERROR_CODE) { + throw Object.assign(new Error((err as Error).message), { + code: FILE_LOCK_STALE_ERROR_CODE, + lockPath: (err as { lockPath?: string }).lockPath ?? "", + }) as FileLockStaleError; + } throw err; } @@ -86,18 +183,32 @@ export async function acquireFileLock( filePath: string, options: FileLockOptions, ): Promise { - try { - const lock = await acquireFsSafeFileLock(filePath, { - managerKey: FILE_LOCK_MANAGER_KEY, - staleMs: options.stale, - retry: options.retries, - allowReentrant: true, - payload: () => ({ pid: process.pid, createdAt: new Date().toISOString() }), - shouldReclaim: shouldReclaimPluginLock, - }); - return { lockPath: lock.lockPath, release: lock.release }; - } catch (err) { - return normalizeTimeoutError(err); + while (true) { + try { + const lock = await acquireFsSafeFileLock(filePath, { + managerKey: FILE_LOCK_MANAGER_KEY, + staleMs: options.stale, + retry: options.retries, + allowReentrant: true, + payload: () => ({ pid: process.pid, createdAt: new Date().toISOString() }), + shouldReclaim: shouldReclaimPluginLock, + }); + return { lockPath: lock.lockPath, release: lock.release }; + } catch (err) { + if (isFileLockError(err, FILE_LOCK_STALE_ERROR_CODE)) { + const lockPath = (err as { lockPath?: string }).lockPath; + if ( + lockPath && + (await removeReportedStaleLockIfStillStale({ + lockPath, + staleMs: options.stale, + })) + ) { + continue; + } + } + return normalizeLockError(err); + } } }