fix(auth): reclaim stale file locks

This commit is contained in:
Peter Steinberger
2026-05-14 06:31:41 +01:00
parent babd25c6b7
commit ceb3092493
3 changed files with 190 additions and 13 deletions

View File

@@ -26,6 +26,7 @@ Docs: https://docs.openclaw.ai
- Plugins: discover provider plugins from `setup.providers[].envVars` credentials during provider discovery while keeping the deprecated `providerAuthEnvVars` fallback. (#81542) Thanks @JARVIS-Glasses.
- Docs/Codex harness: clarify that per-agent `CODEX_HOME` isolates `~/.codex` while inherited `HOME` intentionally keeps `.agents` discovery and subprocess user-home state available.
- CLI/plugins: keep bare plugin and parent-command help on the lightweight path, avoiding plugin registry discovery before rendering help.
- Auth: reclaim dead-owner stale file locks before retrying locked writes, so crashed OAuth refreshes no longer wedge `auth-profiles.json` until manual cleanup.
- CLI tables: preserve muted/color styling on wrapped continuation lines after multiline cells, keeping `openclaw plugins list` descriptions readable.
- Process execution: collapse case-insensitive duplicate child environment keys on Windows so caller-provided overrides such as `PATH` cannot be shadowed by host `Path`.
- Browser CLI: request the existing `operator.admin` gateway scope explicitly for browser control commands, avoiding unnecessary scope-upgrade approval loops. Fixes #81555. (#81716) Thanks @joshavant.

View File

@@ -5,6 +5,7 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import {
acquireFileLock,
drainFileLockStateForTest,
FILE_LOCK_STALE_ERROR_CODE,
FILE_LOCK_TIMEOUT_ERROR_CODE,
resetFileLockStateForTest,
} from "./file-lock.js";
@@ -55,6 +56,70 @@ describe("acquireFileLock", () => {
);
}, 5_000);
it("removes a reported stale lock when its owner pid is dead", async () => {
const filePath = path.join(tempDir, "auth-profiles.json");
const lockPath = `${filePath}.lock`;
const options = {
retries: {
retries: 0,
factor: 1,
minTimeout: 1,
maxTimeout: 1,
},
stale: 10,
} as const;
await fs.writeFile(
lockPath,
JSON.stringify({ pid: 999_999, createdAt: new Date(Date.now() - 60_000).toISOString() }),
"utf8",
);
const lock = await acquireFileLock(filePath, options);
try {
await expect(fs.realpath(lock.lockPath)).resolves.toBe(await fs.realpath(lockPath));
await expect(fs.readFile(lockPath, "utf8")).resolves.toContain(`"pid"`);
} finally {
await lock.release();
}
});
it("keeps a reported stale lock when its owner pid is alive", async () => {
const filePath = path.join(tempDir, "live-owner");
const lockPath = `${filePath}.lock`;
const options = {
retries: {
retries: 0,
factor: 1,
minTimeout: 1,
maxTimeout: 1,
},
stale: 10,
} as const;
await fs.writeFile(
lockPath,
JSON.stringify({ pid: process.pid, createdAt: new Date(Date.now() - 60_000).toISOString() }),
"utf8",
);
let caught: { lockPath?: string } | undefined;
await expect(
(async () => {
try {
await acquireFileLock(filePath, options);
} catch (err) {
caught = err as { lockPath?: string };
throw err;
}
})(),
).rejects.toMatchObject({
code: FILE_LOCK_STALE_ERROR_CODE,
});
await expect(fs.realpath(caught?.lockPath ?? "")).resolves.toBe(await fs.realpath(lockPath));
await expect(fs.readFile(lockPath, "utf8")).resolves.toContain(`"pid":${process.pid}`);
});
it("closes an opened lock handle when writing the owner payload fails", async () => {
const filePath = path.join(tempDir, "write-fails");
const writeError = new Error("owner write failed");

View File

@@ -1,4 +1,5 @@
import "../infra/fs-safe-defaults.js";
import fs from "node:fs/promises";
import {
acquireFileLock as acquireFsSafeFileLock,
drainFileLockManagerForTest,
@@ -28,14 +29,25 @@ export type FileLockHandle = {
};
export const FILE_LOCK_TIMEOUT_ERROR_CODE = "file_lock_timeout";
export const FILE_LOCK_STALE_ERROR_CODE = "file_lock_stale";
export type FileLockTimeoutError = Error & {
code: typeof FILE_LOCK_TIMEOUT_ERROR_CODE;
lockPath: string;
};
export type FileLockStaleError = Error & {
code: typeof FILE_LOCK_STALE_ERROR_CODE;
lockPath: string;
};
const FILE_LOCK_MANAGER_KEY = "openclaw.plugin-sdk.file-lock";
type LockFileSnapshot = {
raw: string;
payload: Record<string, unknown> | null;
};
function readLockPayload(value: Record<string, unknown> | null): LockFilePayload | null {
if (!value) {
return null;
@@ -63,13 +75,98 @@ async function shouldReclaimPluginLock(params: {
return true;
}
function normalizeTimeoutError(err: unknown): never {
function isFileLockError(error: unknown, code: string): boolean {
return (error as { code?: unknown } | null)?.code === code;
}
async function readLockFileSnapshot(lockPath: string): Promise<LockFileSnapshot | null> {
let raw: string;
try {
raw = await fs.readFile(lockPath, "utf8");
} catch (err) {
if ((err as NodeJS.ErrnoException).code === "ENOENT") {
return null;
}
throw err;
}
try {
const parsed = JSON.parse(raw) as unknown;
return {
raw,
payload:
parsed && typeof parsed === "object" && !Array.isArray(parsed)
? (parsed as Record<string, unknown>)
: null,
};
} catch {
return { raw, payload: null };
}
}
function shouldRemoveReportedStalePluginLock(params: {
payload: Record<string, unknown> | null;
staleMs: number;
nowMs: number;
}): boolean {
const payload = readLockPayload(params.payload);
if (payload?.pid) {
return !isPidAlive(payload.pid);
}
if (payload?.createdAt) {
const createdAt = Date.parse(payload.createdAt);
return !Number.isFinite(createdAt) || params.nowMs - createdAt > params.staleMs;
}
return true;
}
async function removeReportedStaleLockIfStillStale(params: {
lockPath: string;
staleMs: number;
}): Promise<boolean> {
const snapshot = await readLockFileSnapshot(params.lockPath);
if (!snapshot) {
return true;
}
if (
!shouldRemoveReportedStalePluginLock({
payload: snapshot.payload,
staleMs: params.staleMs,
nowMs: Date.now(),
})
) {
return false;
}
const current = await readLockFileSnapshot(params.lockPath);
if (!current) {
return true;
}
if (current.raw !== snapshot.raw) {
return false;
}
try {
await fs.unlink(params.lockPath);
return true;
} catch (err) {
return (err as NodeJS.ErrnoException).code === "ENOENT";
}
}
function normalizeLockError(err: unknown): never {
if ((err as { code?: unknown }).code === FILE_LOCK_TIMEOUT_ERROR_CODE) {
throw Object.assign(new Error((err as Error).message), {
code: FILE_LOCK_TIMEOUT_ERROR_CODE,
lockPath: (err as { lockPath?: string }).lockPath ?? "",
}) as FileLockTimeoutError;
}
if ((err as { code?: unknown }).code === FILE_LOCK_STALE_ERROR_CODE) {
throw Object.assign(new Error((err as Error).message), {
code: FILE_LOCK_STALE_ERROR_CODE,
lockPath: (err as { lockPath?: string }).lockPath ?? "",
}) as FileLockStaleError;
}
throw err;
}
@@ -86,18 +183,32 @@ export async function acquireFileLock(
filePath: string,
options: FileLockOptions,
): Promise<FileLockHandle> {
try {
const lock = await acquireFsSafeFileLock(filePath, {
managerKey: FILE_LOCK_MANAGER_KEY,
staleMs: options.stale,
retry: options.retries,
allowReentrant: true,
payload: () => ({ pid: process.pid, createdAt: new Date().toISOString() }),
shouldReclaim: shouldReclaimPluginLock,
});
return { lockPath: lock.lockPath, release: lock.release };
} catch (err) {
return normalizeTimeoutError(err);
while (true) {
try {
const lock = await acquireFsSafeFileLock(filePath, {
managerKey: FILE_LOCK_MANAGER_KEY,
staleMs: options.stale,
retry: options.retries,
allowReentrant: true,
payload: () => ({ pid: process.pid, createdAt: new Date().toISOString() }),
shouldReclaim: shouldReclaimPluginLock,
});
return { lockPath: lock.lockPath, release: lock.release };
} catch (err) {
if (isFileLockError(err, FILE_LOCK_STALE_ERROR_CODE)) {
const lockPath = (err as { lockPath?: string }).lockPath;
if (
lockPath &&
(await removeReportedStaleLockIfStillStale({
lockPath,
staleMs: options.stale,
}))
) {
continue;
}
}
return normalizeLockError(err);
}
}
}