diff --git a/CHANGELOG.md b/CHANGELOG.md index e00b46d2490..21fd9a9f23f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,6 +37,7 @@ Docs: https://docs.openclaw.ai - Providers/OpenAI: lock the auth picker wording for OpenAI API key, Codex browser login, and Codex device pairing so the setup choices no longer imply a mixed Codex/API-key auth path. (#67848) Thanks @tmlxrd. - Agents/BTW: route `/btw` side questions through provider stream registration with the session workspace, so Ollama provider URL construction and workspace-scoped hooks apply correctly. Fixes #68336. (#70413) Thanks @suboss87. +- Agents/sessions: make session transcript write locks non-reentrant by default, so same-process transcript writers contend unless a helper explicitly opts into nested lock ownership. - Memory search: use sqlite-vec KNN for vector recall while preserving full post-filter result limits in multi-model indexes. Fixes #69666. (#69680) Thanks @aalekh-sarvam. - Providers/OpenAI Codex: stop stale per-agent `openai-codex:default` OAuth profiles from shadowing a newer main-agent identity-scoped profile, and let `openclaw doctor` offer the matching cleanup. (#70393) Thanks @pashpashpash. - ACPX: route OpenClaw ACP bridge commands through the MCP-free runtime path even when the command is wrapped with `env`, has bridge flags, or is resumed from persisted session state, so documented `acpx openclaw` setups no longer fail on per-session MCP injection. (#68741) Thanks @alexlomt. diff --git a/docs/concepts/agent-loop.md b/docs/concepts/agent-loop.md index 5671016f52e..b4f0768c09c 100644 --- a/docs/concepts/agent-loop.md +++ b/docs/concepts/agent-loop.md @@ -2,6 +2,7 @@ summary: "Agent loop lifecycle, streams, and wait semantics" read_when: - You need an exact walkthrough of the agent loop or lifecycle events + - You are changing session queueing, transcript writes, or session write lock behavior title: "Agent Loop" --- @@ -48,13 +49,21 @@ wired end-to-end. - This prevents tool/session races and keeps session history consistent. - Messaging channels can choose queue modes (collect/steer/followup) that feed this lane system. See [Command Queue](/concepts/queue). +- Transcript writes are also protected by a session write lock on the session file. The lock is + process-aware and file-based, so it catches writers that bypass the in-process queue or come from + another process. +- Session write locks are non-reentrant by default. If a helper intentionally nests acquisition of + the same lock while preserving one logical writer, it must opt in explicitly with + `allowReentrant: true`. ## Session + workspace preparation - Workspace is resolved and created; sandboxed runs may redirect to a sandbox workspace root. - Skills are loaded (or reused from a snapshot) and injected into env and prompt. - Bootstrap/context files are resolved and injected into the system prompt report. -- A session write lock is acquired; `SessionManager` is opened and prepared before streaming. +- A session write lock is acquired; `SessionManager` is opened and prepared before streaming. Any + later transcript rewrite, compaction, or truncation path must take the same lock before opening or + mutating the transcript file. ## Prompt assembly + system prompt diff --git a/src/agents/session-write-lock.test.ts b/src/agents/session-write-lock.test.ts index f42c6b97034..637aa4dfdf6 100644 --- a/src/agents/session-write-lock.test.ts +++ b/src/agents/session-write-lock.test.ts @@ -125,8 +125,16 @@ describe("acquireSessionWriteLock", () => { const realLockPath = `${sessionReal}.lock`; const linkLockPath = `${sessionLink}.lock`; - const lockA = await acquireSessionWriteLock({ sessionFile: sessionReal, timeoutMs: 500 }); - const lockB = await acquireSessionWriteLock({ sessionFile: sessionLink, timeoutMs: 500 }); + const lockA = await acquireSessionWriteLock({ + sessionFile: sessionReal, + timeoutMs: 500, + allowReentrant: true, + }); + const lockB = await acquireSessionWriteLock({ + sessionFile: sessionLink, + timeoutMs: 500, + allowReentrant: true, + }); await expect(fs.access(realLockPath)).resolves.toBeUndefined(); await expect(fs.access(linkLockPath)).resolves.toBeUndefined(); @@ -147,8 +155,16 @@ describe("acquireSessionWriteLock", () => { it("keeps the lock file until the last release", async () => { await withTempSessionLockFile(async ({ sessionFile, lockPath }) => { - const lockA = await acquireSessionWriteLock({ sessionFile, timeoutMs: 500 }); - const lockB = await acquireSessionWriteLock({ sessionFile, timeoutMs: 500 }); + const lockA = await acquireSessionWriteLock({ + sessionFile, + timeoutMs: 500, + allowReentrant: true, + }); + const lockB = await acquireSessionWriteLock({ + sessionFile, + timeoutMs: 500, + allowReentrant: true, + }); await expectLockRemovedOnlyAfterFinalRelease({ lockPath, @@ -158,6 +174,55 @@ describe("acquireSessionWriteLock", () => { }); }); + it("does not reenter locks by default in the same process", async () => { + await withTempSessionLockFile(async ({ sessionFile }) => { + const lock = await acquireSessionWriteLock({ sessionFile, timeoutMs: 500 }); + await expect( + acquireSessionWriteLock({ sessionFile, timeoutMs: 5, staleMs: 60_000 }), + ).rejects.toThrow(/session file locked/); + await lock.release(); + }); + }); + + it("does not reenter locks by default through symlinked session paths", async () => { + if (process.platform === "win32") { + return; + } + + const root = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-lock-")); + try { + const realDir = path.join(root, "real"); + const linkDir = path.join(root, "link"); + await fs.mkdir(realDir, { recursive: true }); + await fs.symlink(realDir, linkDir); + + const sessionReal = path.join(realDir, "sessions.json"); + const sessionLink = path.join(linkDir, "sessions.json"); + const lock = await acquireSessionWriteLock({ sessionFile: sessionReal, timeoutMs: 500 }); + + await expect( + acquireSessionWriteLock({ sessionFile: sessionLink, timeoutMs: 5, staleMs: 60_000 }), + ).rejects.toThrow(/session file locked/); + + await lock.release(); + } finally { + await fs.rm(root, { recursive: true, force: true }); + } + }); + + it("allows a new default lock acquisition after the held lock is released", async () => { + await withTempSessionLockFile(async ({ sessionFile }) => { + const lockA = await acquireSessionWriteLock({ sessionFile, timeoutMs: 500 }); + await expect( + acquireSessionWriteLock({ sessionFile, timeoutMs: 5, staleMs: 60_000 }), + ).rejects.toThrow(/session file locked/); + await lockA.release(); + + const lockB = await acquireSessionWriteLock({ sessionFile, timeoutMs: 500 }); + await lockB.release(); + }); + }); + it("reclaims stale lock files", async () => { const root = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-lock-")); try { diff --git a/src/agents/session-write-lock.ts b/src/agents/session-write-lock.ts index 364cc7717cd..e0113c9676e 100644 --- a/src/agents/session-write-lock.ts +++ b/src/agents/session-write-lock.ts @@ -482,6 +482,7 @@ export async function acquireSessionWriteLock(params: { release: () => Promise; }> { registerCleanupHandlers(); + const allowReentrant = params.allowReentrant ?? false; const timeoutMs = resolvePositiveMs(params.timeoutMs, 10_000, { allowInfinity: true }); const staleMs = resolvePositiveMs(params.staleMs, DEFAULT_STALE_MS); const maxHoldMs = resolvePositiveMs(params.maxHoldMs, DEFAULT_MAX_HOLD_MS); @@ -497,7 +498,6 @@ export async function acquireSessionWriteLock(params: { const normalizedSessionFile = path.join(normalizedDir, path.basename(sessionFile)); const lockPath = `${normalizedSessionFile}.lock`; - const allowReentrant = params.allowReentrant ?? true; const held = HELD_LOCKS.get(normalizedSessionFile); if (allowReentrant && held) { held.count += 1;