fix(agents): reclaim untracked self-owned session locks

This commit is contained in:
Cedric
2026-05-01 14:23:56 -07:00
committed by Ayaan Zaidi
parent 1131d186b9
commit 233e0b1b32
2 changed files with 113 additions and 23 deletions

View File

@@ -401,6 +401,43 @@ describe("acquireSessionWriteLock", () => {
}
});
it("cleans untracked current-process .jsonl lock files with matching starttime", async () => {
const root = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-lock-"));
const sessionsDir = path.join(root, "sessions");
await fs.mkdir(sessionsDir, { recursive: true });
const nowMs = Date.now();
const orphanSelfLock = path.join(sessionsDir, "orphan-self.jsonl.lock");
try {
await fs.writeFile(
orphanSelfLock,
JSON.stringify({
pid: process.pid,
createdAt: new Date(nowMs).toISOString(),
starttime: FAKE_STARTTIME,
}),
"utf8",
);
const result = await cleanStaleLockFiles({
sessionsDir,
staleMs: 30_000,
nowMs,
removeStale: true,
});
expect(result.locks).toHaveLength(1);
expect(result.cleaned.map((entry) => path.basename(entry.lockPath))).toEqual([
"orphan-self.jsonl.lock",
]);
expect(result.cleaned[0]?.staleReasons).toContain("orphan-self-pid");
await expect(fs.access(orphanSelfLock)).rejects.toThrow();
} finally {
await fs.rm(root, { recursive: true, force: true });
}
});
it("removes held locks on termination signals", async () => {
const signals = ["SIGINT", "SIGTERM", "SIGQUIT", "SIGABRT"] as const;
const originalKill = process.kill.bind(process);
@@ -456,6 +493,14 @@ describe("acquireSessionWriteLock", () => {
});
});
it("reclaims untracked current-process lock files with matching starttime", async () => {
await withTempSessionLockFile(async ({ sessionFile, lockPath }) => {
await writeCurrentProcessLock(lockPath, { starttime: FAKE_STARTTIME });
await expectCurrentPidOwnsLock({ sessionFile, timeoutMs: 500 });
});
});
it("does not reclaim active in-process lock files without starttime", async () => {
await expectActiveInProcessLockIsNotReclaimed();
});
@@ -464,6 +509,10 @@ describe("acquireSessionWriteLock", () => {
await expectActiveInProcessLockIsNotReclaimed({ legacyStarttime: 123.5 });
});
it("does not reclaim active in-process lock files with matching starttime", async () => {
await expectActiveInProcessLockIsNotReclaimed({ legacyStarttime: FAKE_STARTTIME });
});
it("registers cleanup for SIGQUIT and SIGABRT", () => {
expect(__testing.cleanupSignals).toContain("SIGQUIT");
expect(__testing.cleanupSignals).toContain("SIGABRT");

View File

@@ -348,6 +348,18 @@ async function readLockPayload(lockPath: string): Promise<LockFilePayload | null
}
}
async function resolveNormalizedSessionFile(sessionFile: string): Promise<string> {
const resolvedSessionFile = path.resolve(sessionFile);
const sessionDir = path.dirname(resolvedSessionFile);
let normalizedDir = sessionDir;
try {
normalizedDir = await fs.realpath(sessionDir);
} catch {
// Fall back to the resolved path if realpath fails (permissions, transient FS).
}
return path.join(normalizedDir, path.basename(resolvedSessionFile));
}
function inspectLockPayload(
payload: LockFilePayload | null,
staleMs: number,
@@ -429,16 +441,51 @@ async function shouldReclaimContendedLockFile(
function shouldTreatAsOrphanSelfLock(params: {
payload: LockFilePayload | null;
normalizedSessionFile: string;
reclaimWithoutStarttime?: boolean;
}): boolean {
const pid = isValidLockNumber(params.payload?.pid) ? params.payload.pid : null;
if (pid !== process.pid) {
return false;
}
const hasValidStarttime = isValidLockNumber(params.payload?.starttime);
if (hasValidStarttime) {
if (HELD_LOCKS.has(params.normalizedSessionFile)) {
return false;
}
return !HELD_LOCKS.has(params.normalizedSessionFile);
const storedStarttime = isValidLockNumber(params.payload?.starttime)
? params.payload.starttime
: null;
if (storedStarttime === null) {
return params.reclaimWithoutStarttime !== false;
}
const currentStarttime = getProcessStartTime(process.pid);
return currentStarttime !== null && currentStarttime === storedStarttime;
}
function inspectLockPayloadForSession(params: {
payload: LockFilePayload | null;
staleMs: number;
nowMs: number;
normalizedSessionFile: string;
reclaimWithoutStarttime?: boolean;
}): LockInspectionDetails {
const inspected = inspectLockPayload(params.payload, params.staleMs, params.nowMs);
if (
!shouldTreatAsOrphanSelfLock({
payload: params.payload,
normalizedSessionFile: params.normalizedSessionFile,
reclaimWithoutStarttime: params.reclaimWithoutStarttime,
})
) {
return inspected;
}
return {
...inspected,
stale: true,
staleReasons: inspected.staleReasons.includes("orphan-self-pid")
? inspected.staleReasons
: [...inspected.staleReasons, "orphan-self-pid"],
};
}
export async function cleanStaleLockFiles(params: {
@@ -476,7 +523,15 @@ export async function cleanStaleLockFiles(params: {
for (const entry of lockEntries) {
const lockPath = path.join(sessionsDir, entry.name);
const payload = await readLockPayload(lockPath);
const inspected = inspectLockPayload(payload, staleMs, nowMs);
const sessionFile = lockPath.slice(0, -".lock".length);
const normalizedSessionFile = await resolveNormalizedSessionFile(sessionFile);
const inspected = inspectLockPayloadForSession({
payload,
staleMs,
nowMs,
normalizedSessionFile,
reclaimWithoutStarttime: false,
});
const lockInfo: SessionLockInspection = {
lockPath,
...inspected,
@@ -515,13 +570,7 @@ export async function acquireSessionWriteLock(params: {
const sessionFile = path.resolve(params.sessionFile);
const sessionDir = path.dirname(sessionFile);
await fs.mkdir(sessionDir, { recursive: true });
let normalizedDir = sessionDir;
try {
normalizedDir = await fs.realpath(sessionDir);
} catch {
// Fall back to the resolved path if realpath fails (permissions, transient FS).
}
const normalizedSessionFile = path.join(normalizedDir, path.basename(sessionFile));
const normalizedSessionFile = await resolveNormalizedSessionFile(sessionFile);
const lockPath = `${normalizedSessionFile}.lock`;
const held = HELD_LOCKS.get(normalizedSessionFile);
@@ -587,21 +636,13 @@ export async function acquireSessionWriteLock(params: {
}
const payload = await readLockPayload(lockPath);
const nowMs = Date.now();
const inspected = inspectLockPayload(payload, staleMs, nowMs);
const orphanSelfLock = shouldTreatAsOrphanSelfLock({
const inspected = inspectLockPayloadForSession({
payload,
staleMs,
nowMs,
normalizedSessionFile,
});
const reclaimDetails = orphanSelfLock
? {
...inspected,
stale: true,
staleReasons: inspected.staleReasons.includes("orphan-self-pid")
? inspected.staleReasons
: [...inspected.staleReasons, "orphan-self-pid"],
}
: inspected;
if (await shouldReclaimContendedLockFile(lockPath, reclaimDetails, staleMs, nowMs)) {
if (await shouldReclaimContendedLockFile(lockPath, inspected, staleMs, nowMs)) {
await fs.rm(lockPath, { force: true });
continue;
}