mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-29 22:58:45 +00:00
fix(agents): release embedded-attempt session lock on every exit path (#86427)
* fix(agents): release embedded-attempt session lock on every exit path The embedded run controller acquires its session write lock eagerly at creation and released it only inside the post-run cleanup block. An exception thrown in post-prompt processing skipped that block, so the lock leaked to the live gateway process until the watchdog reclaimed it and later requests to the session failed with SessionWriteLockTimeoutError. Add an idempotent dispose() to the lock controller and call it from the run's outer finally so the eagerly-held lock is released on every exit path. Normal/aborted/timed-out runs still hand the lock to acquireForCleanup first, so dispose() is a no-op then (no double release). Fixes #86014 * fix: keep session lock teardown comment lean * docs(changelog): note embedded session lock fix --------- Co-authored-by: Peter Steinberger <steipete@gmail.com>
This commit is contained in:
@@ -18,6 +18,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Discord/OpenAI voice: accept longer leading wake-name mistranscripts such as "Open Club" for OpenClaw.
|
||||
- Discord/OpenAI voice: accept leading fuzzy wake-name transcripts such as "Monty" or "Moti" for a Molty agent while keeping ambient speech gated.
|
||||
- Media understanding: convert HEIC and HEIF images to JPEG before image description providers run so iPhone photos work in direct and configured image-description flows. (#86037)
|
||||
- Agents: release embedded-attempt session locks from outer teardown so post-prompt exceptions cannot wedge later requests behind `SessionWriteLockTimeoutError`. Fixes #86014. Thanks @openperf.
|
||||
- Discord/OpenAI voice: rotate Realtime sessions at provider max duration without logging the expected session-expiry event as an error.
|
||||
- Agents/media: derive bundled plugin local-media trust from plugin tool metadata instead of importing the full plugin registry on subscription paths. (#84409) Thanks @samzong.
|
||||
- Memory/local embeddings: run local GGUF embeddings in an isolated worker sidecar and degrade to configured fallback or keyword search on worker failure so native embedding crashes do not take down the Gateway. (#85348) Thanks @osolmaz.
|
||||
|
||||
@@ -68,6 +68,45 @@ describe("embedded attempt session lock lifecycle", () => {
|
||||
expect(releases).toEqual(["prep", "cleanup"]);
|
||||
});
|
||||
|
||||
it("releases the eagerly-held attempt lock on dispose when cleanup is skipped (#86014)", async () => {
|
||||
const releases: string[] = [];
|
||||
const acquireSessionWriteLock = vi
|
||||
.fn()
|
||||
.mockResolvedValueOnce({ release: vi.fn(async () => releases.push("held")) });
|
||||
|
||||
const controller = await createEmbeddedAttemptSessionLockController({
|
||||
acquireSessionWriteLock,
|
||||
lockOptions,
|
||||
});
|
||||
|
||||
// An exception on the post-prompt path skips acquireForCleanup; the run's outer finally
|
||||
// must still release the eagerly-held lock or it leaks to the live process.
|
||||
await controller.dispose();
|
||||
await controller.dispose(); // idempotent
|
||||
|
||||
expect(acquireSessionWriteLock).toHaveBeenCalledTimes(1);
|
||||
expect(releases).toEqual(["held"]);
|
||||
});
|
||||
|
||||
it("dispose does not double-release a lock already handed to cleanup", async () => {
|
||||
const releases: string[] = [];
|
||||
const acquireSessionWriteLock = vi
|
||||
.fn()
|
||||
.mockResolvedValueOnce({ release: vi.fn(async () => releases.push("held")) });
|
||||
|
||||
const controller = await createEmbeddedAttemptSessionLockController({
|
||||
acquireSessionWriteLock,
|
||||
lockOptions,
|
||||
});
|
||||
|
||||
const cleanupLock = await controller.acquireForCleanup();
|
||||
await cleanupLock.release();
|
||||
await controller.dispose();
|
||||
|
||||
expect(acquireSessionWriteLock).toHaveBeenCalledTimes(1);
|
||||
expect(releases).toEqual(["held"]);
|
||||
});
|
||||
|
||||
it("runs post-prompt transcript writes under a short reacquired lock", async () => {
|
||||
const events: string[] = [];
|
||||
const acquireSessionWriteLock = vi
|
||||
|
||||
@@ -630,6 +630,7 @@ export type EmbeddedAttemptSessionLockController = {
|
||||
): Promise<T>;
|
||||
acquireForCleanup(params?: { session?: unknown }): Promise<SessionLock>;
|
||||
hasSessionTakeover(): boolean;
|
||||
dispose(): Promise<void>;
|
||||
};
|
||||
|
||||
export async function createEmbeddedAttemptSessionLockController(params: {
|
||||
@@ -872,6 +873,14 @@ export async function createEmbeddedAttemptSessionLockController(params: {
|
||||
hasSessionTakeover(): boolean {
|
||||
return takeoverDetected;
|
||||
},
|
||||
async dispose(): Promise<void> {
|
||||
if (!heldLock) {
|
||||
return;
|
||||
}
|
||||
const lock = heldLock;
|
||||
heldLock = undefined;
|
||||
await lock.release();
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -1302,6 +1302,8 @@ export async function runEmbeddedAttempt(
|
||||
| undefined;
|
||||
let beforeAgentRunBlocked = false;
|
||||
let beforeAgentRunBlockedBy: string | undefined;
|
||||
// Releases the eager session lock if post-prompt code exits before cleanup.
|
||||
let releaseRetainedSessionLock: (() => Promise<void>) | undefined;
|
||||
try {
|
||||
const skillsSnapshotForRun =
|
||||
sandbox?.enabled && sandbox.workspaceAccess !== "rw" ? undefined : params.skillsSnapshot;
|
||||
@@ -2140,6 +2142,7 @@ export async function runEmbeddedAttempt(
|
||||
...sessionWriteLockOptions,
|
||||
},
|
||||
});
|
||||
releaseRetainedSessionLock = () => sessionLockController.dispose();
|
||||
|
||||
let sessionManager: ReturnType<typeof guardSessionManager> | undefined;
|
||||
let session: Awaited<ReturnType<typeof createAgentSession>>["session"] | undefined;
|
||||
@@ -5070,6 +5073,13 @@ export async function runEmbeddedAttempt(
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
try {
|
||||
await releaseRetainedSessionLock?.();
|
||||
} catch (releaseErr) {
|
||||
log.error(
|
||||
`failed to release retained session lock on attempt teardown: runId=${params.runId} ${String(releaseErr)}`,
|
||||
);
|
||||
}
|
||||
emitDiagnosticRunCompleted?.(
|
||||
aborted ? "aborted" : "error",
|
||||
promptError ?? new Error("run exited before diagnostic completion"),
|
||||
|
||||
Reference in New Issue
Block a user