From 32ddfc22f5e432ef245541960a67949cb9f8d4ea Mon Sep 17 00:00:00 2001 From: Chunyue Wang <80630709+openperf@users.noreply.github.com> Date: Mon, 25 May 2026 22:03:23 +0800 Subject: [PATCH] fix(agents): release embedded-attempt session lock on every exit path (#86427) * fix(agents): release embedded-attempt session lock on every exit path The embedded run controller acquires its session write lock eagerly at creation and released it only inside the post-run cleanup block. An exception thrown in post-prompt processing skipped that block, so the lock leaked to the live gateway process until the watchdog reclaimed it and later requests to the session failed with SessionWriteLockTimeoutError. Add an idempotent dispose() to the lock controller and call it from the run's outer finally so the eagerly-held lock is released on every exit path. Normal/aborted/timed-out runs still hand the lock to acquireForCleanup first, so dispose() is a no-op then (no double release). Fixes #86014 * fix: keep session lock teardown comment lean * docs(changelog): note embedded session lock fix --------- Co-authored-by: Peter Steinberger --- CHANGELOG.md | 1 + .../run/attempt.session-lock.test.ts | 39 +++++++++++++++++++ .../run/attempt.session-lock.ts | 9 +++++ src/agents/pi-embedded-runner/run/attempt.ts | 10 +++++ 4 files changed, 59 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6f48895cf3f..af8711776e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ Docs: https://docs.openclaw.ai - Discord/OpenAI voice: accept longer leading wake-name mistranscripts such as "Open Club" for OpenClaw. - Discord/OpenAI voice: accept leading fuzzy wake-name transcripts such as "Monty" or "Moti" for a Molty agent while keeping ambient speech gated. - Media understanding: convert HEIC and HEIF images to JPEG before image description providers run so iPhone photos work in direct and configured image-description flows. (#86037) +- Agents: release embedded-attempt session locks from outer teardown so post-prompt exceptions cannot wedge later requests behind `SessionWriteLockTimeoutError`. Fixes #86014. Thanks @openperf. - Discord/OpenAI voice: rotate Realtime sessions at provider max duration without logging the expected session-expiry event as an error. - Agents/media: derive bundled plugin local-media trust from plugin tool metadata instead of importing the full plugin registry on subscription paths. (#84409) Thanks @samzong. - Memory/local embeddings: run local GGUF embeddings in an isolated worker sidecar and degrade to configured fallback or keyword search on worker failure so native embedding crashes do not take down the Gateway. (#85348) Thanks @osolmaz. diff --git a/src/agents/pi-embedded-runner/run/attempt.session-lock.test.ts b/src/agents/pi-embedded-runner/run/attempt.session-lock.test.ts index ccb18df90f2..7a81ca8a352 100644 --- a/src/agents/pi-embedded-runner/run/attempt.session-lock.test.ts +++ b/src/agents/pi-embedded-runner/run/attempt.session-lock.test.ts @@ -68,6 +68,45 @@ describe("embedded attempt session lock lifecycle", () => { expect(releases).toEqual(["prep", "cleanup"]); }); + it("releases the eagerly-held attempt lock on dispose when cleanup is skipped (#86014)", async () => { + const releases: string[] = []; + const acquireSessionWriteLock = vi + .fn() + .mockResolvedValueOnce({ release: vi.fn(async () => releases.push("held")) }); + + const controller = await createEmbeddedAttemptSessionLockController({ + acquireSessionWriteLock, + lockOptions, + }); + + // An exception on the post-prompt path skips acquireForCleanup; the run's outer finally + // must still release the eagerly-held lock or it leaks to the live process. + await controller.dispose(); + await controller.dispose(); // idempotent + + expect(acquireSessionWriteLock).toHaveBeenCalledTimes(1); + expect(releases).toEqual(["held"]); + }); + + it("dispose does not double-release a lock already handed to cleanup", async () => { + const releases: string[] = []; + const acquireSessionWriteLock = vi + .fn() + .mockResolvedValueOnce({ release: vi.fn(async () => releases.push("held")) }); + + const controller = await createEmbeddedAttemptSessionLockController({ + acquireSessionWriteLock, + lockOptions, + }); + + const cleanupLock = await controller.acquireForCleanup(); + await cleanupLock.release(); + await controller.dispose(); + + expect(acquireSessionWriteLock).toHaveBeenCalledTimes(1); + expect(releases).toEqual(["held"]); + }); + it("runs post-prompt transcript writes under a short reacquired lock", async () => { const events: string[] = []; const acquireSessionWriteLock = vi diff --git a/src/agents/pi-embedded-runner/run/attempt.session-lock.ts b/src/agents/pi-embedded-runner/run/attempt.session-lock.ts index dc94b5f90b2..d945a562a68 100644 --- a/src/agents/pi-embedded-runner/run/attempt.session-lock.ts +++ b/src/agents/pi-embedded-runner/run/attempt.session-lock.ts @@ -630,6 +630,7 @@ export type EmbeddedAttemptSessionLockController = { ): Promise; acquireForCleanup(params?: { session?: unknown }): Promise; hasSessionTakeover(): boolean; + dispose(): Promise; }; export async function createEmbeddedAttemptSessionLockController(params: { @@ -872,6 +873,14 @@ export async function createEmbeddedAttemptSessionLockController(params: { hasSessionTakeover(): boolean { return takeoverDetected; }, + async dispose(): Promise { + if (!heldLock) { + return; + } + const lock = heldLock; + heldLock = undefined; + await lock.release(); + }, }; } diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index 9c566f341ca..bd44890c6b9 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -1302,6 +1302,8 @@ export async function runEmbeddedAttempt( | undefined; let beforeAgentRunBlocked = false; let beforeAgentRunBlockedBy: string | undefined; + // Releases the eager session lock if post-prompt code exits before cleanup. + let releaseRetainedSessionLock: (() => Promise) | undefined; try { const skillsSnapshotForRun = sandbox?.enabled && sandbox.workspaceAccess !== "rw" ? undefined : params.skillsSnapshot; @@ -2140,6 +2142,7 @@ export async function runEmbeddedAttempt( ...sessionWriteLockOptions, }, }); + releaseRetainedSessionLock = () => sessionLockController.dispose(); let sessionManager: ReturnType | undefined; let session: Awaited>["session"] | undefined; @@ -5070,6 +5073,13 @@ export async function runEmbeddedAttempt( } } } finally { + try { + await releaseRetainedSessionLock?.(); + } catch (releaseErr) { + log.error( + `failed to release retained session lock on attempt teardown: runId=${params.runId} ${String(releaseErr)}`, + ); + } emitDiagnosticRunCompleted?.( aborted ? "aborted" : "error", promptError ?? new Error("run exited before diagnostic completion"),