fix: make session write locks non-reentrant by default

This commit is contained in:
Peter Steinberger
2026-04-23 04:57:25 +01:00
parent d878cf026c
commit 35ec4a9991
4 changed files with 81 additions and 6 deletions

View File

@@ -37,6 +37,7 @@ Docs: https://docs.openclaw.ai
- Providers/OpenAI: lock the auth picker wording for OpenAI API key, Codex browser login, and Codex device pairing so the setup choices no longer imply a mixed Codex/API-key auth path. (#67848) Thanks @tmlxrd.
- Agents/BTW: route `/btw` side questions through provider stream registration with the session workspace, so Ollama provider URL construction and workspace-scoped hooks apply correctly. Fixes #68336. (#70413) Thanks @suboss87.
- Agents/sessions: make session transcript write locks non-reentrant by default, so same-process transcript writers contend unless a helper explicitly opts into nested lock ownership.
- Memory search: use sqlite-vec KNN for vector recall while preserving full post-filter result limits in multi-model indexes. Fixes #69666. (#69680) Thanks @aalekh-sarvam.
- Providers/OpenAI Codex: stop stale per-agent `openai-codex:default` OAuth profiles from shadowing a newer main-agent identity-scoped profile, and let `openclaw doctor` offer the matching cleanup. (#70393) Thanks @pashpashpash.
- ACPX: route OpenClaw ACP bridge commands through the MCP-free runtime path even when the command is wrapped with `env`, has bridge flags, or is resumed from persisted session state, so documented `acpx openclaw` setups no longer fail on per-session MCP injection. (#68741) Thanks @alexlomt.

View File

@@ -2,6 +2,7 @@
summary: "Agent loop lifecycle, streams, and wait semantics"
read_when:
- You need an exact walkthrough of the agent loop or lifecycle events
- You are changing session queueing, transcript writes, or session write lock behavior
title: "Agent Loop"
---
@@ -48,13 +49,21 @@ wired end-to-end.
- This prevents tool/session races and keeps session history consistent.
- Messaging channels can choose queue modes (collect/steer/followup) that feed this lane system.
See [Command Queue](/concepts/queue).
- Transcript writes are also protected by a session write lock on the session file. The lock is
process-aware and file-based, so it catches writers that bypass the in-process queue or come from
another process.
- Session write locks are non-reentrant by default. If a helper intentionally nests acquisition of
the same lock while preserving one logical writer, it must opt in explicitly with
`allowReentrant: true`.
## Session + workspace preparation
- Workspace is resolved and created; sandboxed runs may redirect to a sandbox workspace root.
- Skills are loaded (or reused from a snapshot) and injected into env and prompt.
- Bootstrap/context files are resolved and injected into the system prompt report.
- A session write lock is acquired; `SessionManager` is opened and prepared before streaming.
- A session write lock is acquired; `SessionManager` is opened and prepared before streaming. Any
later transcript rewrite, compaction, or truncation path must take the same lock before opening or
mutating the transcript file.
## Prompt assembly + system prompt

View File

@@ -125,8 +125,16 @@ describe("acquireSessionWriteLock", () => {
const realLockPath = `${sessionReal}.lock`;
const linkLockPath = `${sessionLink}.lock`;
const lockA = await acquireSessionWriteLock({ sessionFile: sessionReal, timeoutMs: 500 });
const lockB = await acquireSessionWriteLock({ sessionFile: sessionLink, timeoutMs: 500 });
const lockA = await acquireSessionWriteLock({
sessionFile: sessionReal,
timeoutMs: 500,
allowReentrant: true,
});
const lockB = await acquireSessionWriteLock({
sessionFile: sessionLink,
timeoutMs: 500,
allowReentrant: true,
});
await expect(fs.access(realLockPath)).resolves.toBeUndefined();
await expect(fs.access(linkLockPath)).resolves.toBeUndefined();
@@ -147,8 +155,16 @@ describe("acquireSessionWriteLock", () => {
it("keeps the lock file until the last release", async () => {
await withTempSessionLockFile(async ({ sessionFile, lockPath }) => {
const lockA = await acquireSessionWriteLock({ sessionFile, timeoutMs: 500 });
const lockB = await acquireSessionWriteLock({ sessionFile, timeoutMs: 500 });
const lockA = await acquireSessionWriteLock({
sessionFile,
timeoutMs: 500,
allowReentrant: true,
});
const lockB = await acquireSessionWriteLock({
sessionFile,
timeoutMs: 500,
allowReentrant: true,
});
await expectLockRemovedOnlyAfterFinalRelease({
lockPath,
@@ -158,6 +174,55 @@ describe("acquireSessionWriteLock", () => {
});
});
it("does not reenter locks by default in the same process", async () => {
await withTempSessionLockFile(async ({ sessionFile }) => {
const lock = await acquireSessionWriteLock({ sessionFile, timeoutMs: 500 });
await expect(
acquireSessionWriteLock({ sessionFile, timeoutMs: 5, staleMs: 60_000 }),
).rejects.toThrow(/session file locked/);
await lock.release();
});
});
it("does not reenter locks by default through symlinked session paths", async () => {
if (process.platform === "win32") {
return;
}
const root = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-lock-"));
try {
const realDir = path.join(root, "real");
const linkDir = path.join(root, "link");
await fs.mkdir(realDir, { recursive: true });
await fs.symlink(realDir, linkDir);
const sessionReal = path.join(realDir, "sessions.json");
const sessionLink = path.join(linkDir, "sessions.json");
const lock = await acquireSessionWriteLock({ sessionFile: sessionReal, timeoutMs: 500 });
await expect(
acquireSessionWriteLock({ sessionFile: sessionLink, timeoutMs: 5, staleMs: 60_000 }),
).rejects.toThrow(/session file locked/);
await lock.release();
} finally {
await fs.rm(root, { recursive: true, force: true });
}
});
it("allows a new default lock acquisition after the held lock is released", async () => {
await withTempSessionLockFile(async ({ sessionFile }) => {
const lockA = await acquireSessionWriteLock({ sessionFile, timeoutMs: 500 });
await expect(
acquireSessionWriteLock({ sessionFile, timeoutMs: 5, staleMs: 60_000 }),
).rejects.toThrow(/session file locked/);
await lockA.release();
const lockB = await acquireSessionWriteLock({ sessionFile, timeoutMs: 500 });
await lockB.release();
});
});
it("reclaims stale lock files", async () => {
const root = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-lock-"));
try {

View File

@@ -482,6 +482,7 @@ export async function acquireSessionWriteLock(params: {
release: () => Promise<void>;
}> {
registerCleanupHandlers();
const allowReentrant = params.allowReentrant ?? false;
const timeoutMs = resolvePositiveMs(params.timeoutMs, 10_000, { allowInfinity: true });
const staleMs = resolvePositiveMs(params.staleMs, DEFAULT_STALE_MS);
const maxHoldMs = resolvePositiveMs(params.maxHoldMs, DEFAULT_MAX_HOLD_MS);
@@ -497,7 +498,6 @@ export async function acquireSessionWriteLock(params: {
const normalizedSessionFile = path.join(normalizedDir, path.basename(sessionFile));
const lockPath = `${normalizedSessionFile}.lock`;
const allowReentrant = params.allowReentrant ?? true;
const held = HELD_LOCKS.get(normalizedSessionFile);
if (allowReentrant && held) {
held.count += 1;