fix(agents): release embedded-attempt session lock on every exit path (#86427)

* fix(agents): release embedded-attempt session lock on every exit path

The embedded run controller acquires its session write lock eagerly at
creation and released it only inside the post-run cleanup block. An
exception thrown in post-prompt processing skipped that block, so the lock
leaked to the live gateway process until the watchdog reclaimed it and
later requests to the session failed with SessionWriteLockTimeoutError.

Add an idempotent dispose() to the lock controller and call it from the
run's outer finally so the eagerly-held lock is released on every exit
path. Normal/aborted/timed-out runs still hand the lock to
acquireForCleanup first, so dispose() is a no-op then (no double release).

Fixes #86014

* fix: keep session lock teardown comment lean

* docs(changelog): note embedded session lock fix

---------

Co-authored-by: Peter Steinberger <steipete@gmail.com>
This commit is contained in:
Chunyue Wang
2026-05-25 22:03:23 +08:00
committed by GitHub
parent b077c3a813
commit 32ddfc22f5
4 changed files with 59 additions and 0 deletions

View File

@@ -18,6 +18,7 @@ Docs: https://docs.openclaw.ai
- Discord/OpenAI voice: accept longer leading wake-name mistranscripts such as "Open Club" for OpenClaw.
- Discord/OpenAI voice: accept leading fuzzy wake-name transcripts such as "Monty" or "Moti" for a Molty agent while keeping ambient speech gated.
- Media understanding: convert HEIC and HEIF images to JPEG before image description providers run so iPhone photos work in direct and configured image-description flows. (#86037)
- Agents: release embedded-attempt session locks from outer teardown so post-prompt exceptions cannot wedge later requests behind `SessionWriteLockTimeoutError`. Fixes #86014. Thanks @openperf.
- Discord/OpenAI voice: rotate Realtime sessions at provider max duration without logging the expected session-expiry event as an error.
- Agents/media: derive bundled plugin local-media trust from plugin tool metadata instead of importing the full plugin registry on subscription paths. (#84409) Thanks @samzong.
- Memory/local embeddings: run local GGUF embeddings in an isolated worker sidecar and degrade to configured fallback or keyword search on worker failure so native embedding crashes do not take down the Gateway. (#85348) Thanks @osolmaz.

View File

@@ -68,6 +68,45 @@ describe("embedded attempt session lock lifecycle", () => {
expect(releases).toEqual(["prep", "cleanup"]);
});
it("releases the eagerly-held attempt lock on dispose when cleanup is skipped (#86014)", async () => {
const releases: string[] = [];
const acquireSessionWriteLock = vi
.fn()
.mockResolvedValueOnce({ release: vi.fn(async () => releases.push("held")) });
const controller = await createEmbeddedAttemptSessionLockController({
acquireSessionWriteLock,
lockOptions,
});
// An exception on the post-prompt path skips acquireForCleanup; the run's outer finally
// must still release the eagerly-held lock or it leaks to the live process.
await controller.dispose();
await controller.dispose(); // idempotent
expect(acquireSessionWriteLock).toHaveBeenCalledTimes(1);
expect(releases).toEqual(["held"]);
});
it("dispose does not double-release a lock already handed to cleanup", async () => {
const releases: string[] = [];
const acquireSessionWriteLock = vi
.fn()
.mockResolvedValueOnce({ release: vi.fn(async () => releases.push("held")) });
const controller = await createEmbeddedAttemptSessionLockController({
acquireSessionWriteLock,
lockOptions,
});
const cleanupLock = await controller.acquireForCleanup();
await cleanupLock.release();
await controller.dispose();
expect(acquireSessionWriteLock).toHaveBeenCalledTimes(1);
expect(releases).toEqual(["held"]);
});
it("runs post-prompt transcript writes under a short reacquired lock", async () => {
const events: string[] = [];
const acquireSessionWriteLock = vi

View File

@@ -630,6 +630,7 @@ export type EmbeddedAttemptSessionLockController = {
): Promise<T>;
acquireForCleanup(params?: { session?: unknown }): Promise<SessionLock>;
hasSessionTakeover(): boolean;
dispose(): Promise<void>;
};
export async function createEmbeddedAttemptSessionLockController(params: {
@@ -872,6 +873,14 @@ export async function createEmbeddedAttemptSessionLockController(params: {
hasSessionTakeover(): boolean {
return takeoverDetected;
},
async dispose(): Promise<void> {
if (!heldLock) {
return;
}
const lock = heldLock;
heldLock = undefined;
await lock.release();
},
};
}

View File

@@ -1302,6 +1302,8 @@ export async function runEmbeddedAttempt(
| undefined;
let beforeAgentRunBlocked = false;
let beforeAgentRunBlockedBy: string | undefined;
// Releases the eager session lock if post-prompt code exits before cleanup.
let releaseRetainedSessionLock: (() => Promise<void>) | undefined;
try {
const skillsSnapshotForRun =
sandbox?.enabled && sandbox.workspaceAccess !== "rw" ? undefined : params.skillsSnapshot;
@@ -2140,6 +2142,7 @@ export async function runEmbeddedAttempt(
...sessionWriteLockOptions,
},
});
releaseRetainedSessionLock = () => sessionLockController.dispose();
let sessionManager: ReturnType<typeof guardSessionManager> | undefined;
let session: Awaited<ReturnType<typeof createAgentSession>>["session"] | undefined;
@@ -5070,6 +5073,13 @@ export async function runEmbeddedAttempt(
}
}
} finally {
try {
await releaseRetainedSessionLock?.();
} catch (releaseErr) {
log.error(
`failed to release retained session lock on attempt teardown: runId=${params.runId} ${String(releaseErr)}`,
);
}
emitDiagnosticRunCompleted?.(
aborted ? "aborted" : "error",
promptError ?? new Error("run exited before diagnostic completion"),