fix(agents): reclaim untracked self-owned session locks (#75822)

Summary:
- The PR refactors session-lock inspection to reclaim untracked current-process locks with matching starttime during acquisition and startup cleanup, adds regression tests, and adds a changelog entry.
- Reproducibility: yes. A high-confidence code-level reproduction is to create a fresh `.jsonl.lock` with `pid ... eLock or cleanStaleLockFiles on current main and observe that acquisition waits or cleanup leaves the lock.

ClawSweeper fixups:
- Included follow-up commit: docs: add session lock changelog entry
- Included follow-up commit: refactor(agents): distill session lock reclaim policy

Validation:
- ClawSweeper review passed for head 2eae2c93b1.
- Required merge gates passed before the squash merge.

Prepared head SHA: 2eae2c93b1
Review: https://github.com/openclaw/openclaw/pull/75822#issuecomment-4361741599

Co-authored-by: Cedric <86914379+cdznho@users.noreply.github.com>
Co-authored-by: Ayaan Zaidi <hi@obviy.us>
This commit is contained in:
Cedric
2026-05-02 04:07:19 +01:00
committed by GitHub
parent 82a8006f77
commit 2f2bb7dac6
3 changed files with 114 additions and 23 deletions

View File

@@ -54,6 +54,7 @@ Docs: https://docs.openclaw.ai
- Slack/setup: print the generated app manifest as plain JSON instead of embedding it inside the framed setup note, so it can be copied into Slack without deleting border characters. Fixes #65751. Thanks @theDanielJLewis.
- Channels/WhatsApp: route CLI logout through the live Gateway and stop runtime-backed listeners before channel removal, so removing a WhatsApp account does not leave the old socket replying until restart. Fixes #67746. Thanks @123Mismail.
- Voice Call/Twilio: honor TTS directive text and provider voice/model overrides during telephony synthesis, so `[[tts:...]]` tags are not spoken literally and voiceId overrides reach OpenAI/ElevenLabs calls. Fixes #58114. Thanks @legonhilltech-jpg.
- Agents/session-locks: reclaim untracked current-process session locks with matching starttime during acquisition and startup cleanup, so Gateway restarts recover from self-owned orphan `.jsonl.lock` files. Fixes #75805; refs #49603. Thanks @cdznho.
- Agents/subagents: initialize built-in context engines before native `sessions_spawn` resolves spawn preparation, so cliBackend-only cold starts no longer fail with an unregistered `legacy` context engine. Fixes #73095. (#73904) Thanks @brokemac79.
- Agents/Codex: stop prompting message-tool-only source turns to finish with `NO_REPLY`, so quiet turns are represented by not calling the visible message tool instead of conflicting final-text instructions. Thanks @pashpashpash.
- Gateway/config: report failed backup restores as failed in logs and config observe audit records instead of marking them valid. (#70515) Thanks @davidangularme.

View File

@@ -401,6 +401,43 @@ describe("acquireSessionWriteLock", () => {
}
});
it("cleans untracked current-process .jsonl lock files with matching starttime", async () => {
const root = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-lock-"));
const sessionsDir = path.join(root, "sessions");
await fs.mkdir(sessionsDir, { recursive: true });
const nowMs = Date.now();
const orphanSelfLock = path.join(sessionsDir, "orphan-self.jsonl.lock");
try {
await fs.writeFile(
orphanSelfLock,
JSON.stringify({
pid: process.pid,
createdAt: new Date(nowMs).toISOString(),
starttime: FAKE_STARTTIME,
}),
"utf8",
);
const result = await cleanStaleLockFiles({
sessionsDir,
staleMs: 30_000,
nowMs,
removeStale: true,
});
expect(result.locks).toHaveLength(1);
expect(result.cleaned.map((entry) => path.basename(entry.lockPath))).toEqual([
"orphan-self.jsonl.lock",
]);
expect(result.cleaned[0]?.staleReasons).toContain("orphan-self-pid");
await expect(fs.access(orphanSelfLock)).rejects.toThrow();
} finally {
await fs.rm(root, { recursive: true, force: true });
}
});
it("removes held locks on termination signals", async () => {
const signals = ["SIGINT", "SIGTERM", "SIGQUIT", "SIGABRT"] as const;
const originalKill = process.kill.bind(process);
@@ -456,6 +493,14 @@ describe("acquireSessionWriteLock", () => {
});
});
it("reclaims untracked current-process lock files with matching starttime", async () => {
await withTempSessionLockFile(async ({ sessionFile, lockPath }) => {
await writeCurrentProcessLock(lockPath, { starttime: FAKE_STARTTIME });
await expectCurrentPidOwnsLock({ sessionFile, timeoutMs: 500 });
});
});
it("does not reclaim active in-process lock files without starttime", async () => {
await expectActiveInProcessLockIsNotReclaimed();
});
@@ -464,6 +509,10 @@ describe("acquireSessionWriteLock", () => {
await expectActiveInProcessLockIsNotReclaimed({ legacyStarttime: 123.5 });
});
it("does not reclaim active in-process lock files with matching starttime", async () => {
await expectActiveInProcessLockIsNotReclaimed({ legacyStarttime: FAKE_STARTTIME });
});
it("registers cleanup for SIGQUIT and SIGABRT", () => {
expect(__testing.cleanupSignals).toContain("SIGQUIT");
expect(__testing.cleanupSignals).toContain("SIGABRT");

View File

@@ -348,6 +348,17 @@ async function readLockPayload(lockPath: string): Promise<LockFilePayload | null
}
}
async function resolveNormalizedSessionFile(sessionFile: string): Promise<string> {
const resolvedSessionFile = path.resolve(sessionFile);
const sessionDir = path.dirname(resolvedSessionFile);
try {
const normalizedDir = await fs.realpath(sessionDir);
return path.join(normalizedDir, path.basename(resolvedSessionFile));
} catch {
return resolvedSessionFile;
}
}
function inspectLockPayload(
payload: LockFilePayload | null,
staleMs: number,
@@ -429,16 +440,51 @@ async function shouldReclaimContendedLockFile(
function shouldTreatAsOrphanSelfLock(params: {
payload: LockFilePayload | null;
normalizedSessionFile: string;
reclaimLockWithoutStarttime: boolean;
}): boolean {
const pid = isValidLockNumber(params.payload?.pid) ? params.payload.pid : null;
if (pid !== process.pid) {
return false;
}
const hasValidStarttime = isValidLockNumber(params.payload?.starttime);
if (hasValidStarttime) {
if (HELD_LOCKS.has(params.normalizedSessionFile)) {
return false;
}
return !HELD_LOCKS.has(params.normalizedSessionFile);
const storedStarttime = isValidLockNumber(params.payload?.starttime)
? params.payload.starttime
: null;
if (storedStarttime === null) {
return params.reclaimLockWithoutStarttime;
}
const currentStarttime = getProcessStartTime(process.pid);
return currentStarttime !== null && currentStarttime === storedStarttime;
}
function inspectLockPayloadForSession(params: {
payload: LockFilePayload | null;
staleMs: number;
nowMs: number;
normalizedSessionFile: string;
reclaimLockWithoutStarttime: boolean;
}): LockInspectionDetails {
const inspected = inspectLockPayload(params.payload, params.staleMs, params.nowMs);
if (
!shouldTreatAsOrphanSelfLock({
payload: params.payload,
normalizedSessionFile: params.normalizedSessionFile,
reclaimLockWithoutStarttime: params.reclaimLockWithoutStarttime,
})
) {
return inspected;
}
return {
...inspected,
stale: true,
staleReasons: inspected.staleReasons.includes("orphan-self-pid")
? inspected.staleReasons
: [...inspected.staleReasons, "orphan-self-pid"],
};
}
export async function cleanStaleLockFiles(params: {
@@ -476,7 +522,15 @@ export async function cleanStaleLockFiles(params: {
for (const entry of lockEntries) {
const lockPath = path.join(sessionsDir, entry.name);
const payload = await readLockPayload(lockPath);
const inspected = inspectLockPayload(payload, staleMs, nowMs);
const sessionFile = lockPath.slice(0, -".lock".length);
const normalizedSessionFile = await resolveNormalizedSessionFile(sessionFile);
const inspected = inspectLockPayloadForSession({
payload,
staleMs,
nowMs,
normalizedSessionFile,
reclaimLockWithoutStarttime: false,
});
const lockInfo: SessionLockInspection = {
lockPath,
...inspected,
@@ -515,13 +569,7 @@ export async function acquireSessionWriteLock(params: {
const sessionFile = path.resolve(params.sessionFile);
const sessionDir = path.dirname(sessionFile);
await fs.mkdir(sessionDir, { recursive: true });
let normalizedDir = sessionDir;
try {
normalizedDir = await fs.realpath(sessionDir);
} catch {
// Fall back to the resolved path if realpath fails (permissions, transient FS).
}
const normalizedSessionFile = path.join(normalizedDir, path.basename(sessionFile));
const normalizedSessionFile = await resolveNormalizedSessionFile(sessionFile);
const lockPath = `${normalizedSessionFile}.lock`;
const held = HELD_LOCKS.get(normalizedSessionFile);
@@ -587,21 +635,14 @@ export async function acquireSessionWriteLock(params: {
}
const payload = await readLockPayload(lockPath);
const nowMs = Date.now();
const inspected = inspectLockPayload(payload, staleMs, nowMs);
const orphanSelfLock = shouldTreatAsOrphanSelfLock({
const inspected = inspectLockPayloadForSession({
payload,
staleMs,
nowMs,
normalizedSessionFile,
reclaimLockWithoutStarttime: true,
});
const reclaimDetails = orphanSelfLock
? {
...inspected,
stale: true,
staleReasons: inspected.staleReasons.includes("orphan-self-pid")
? inspected.staleReasons
: [...inspected.staleReasons, "orphan-self-pid"],
}
: inspected;
if (await shouldReclaimContendedLockFile(lockPath, reclaimDetails, staleMs, nowMs)) {
if (await shouldReclaimContendedLockFile(lockPath, inspected, staleMs, nowMs)) {
await fs.rm(lockPath, { force: true });
continue;
}