From a35333abe1641603fa148b5126e7afe8db642f09 Mon Sep 17 00:00:00 2001 From: Sean Coley Date: Sat, 25 Apr 2026 17:21:49 +1200 Subject: [PATCH] fix(browser): recover stale Chromium profile locks (#62935) (#62935) Co-authored-by: Peter Steinberger --- CHANGELOG.md | 1 + docs/tools/browser-linux-troubleshooting.md | 10 + .../src/browser/chrome.internal.test.ts | 60 +++++ extensions/browser/src/browser/chrome.test.ts | 50 ++++ extensions/browser/src/browser/chrome.ts | 234 ++++++++++++++---- 5 files changed, 300 insertions(+), 55 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dcae15dc382..109dddddf82 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -89,6 +89,7 @@ Docs: https://docs.openclaw.ai - Browser/security: require `operator.admin` for the `browser.request` gateway method, matching the host/browser-node control authority exposed by that route. Thanks @RichardCao. - Browser/profiles: allow local managed profiles to override `browser.executablePath`, so different profiles can launch different Chromium-based browsers. Thanks @nobrainer-tech. - Agents/replay: repair displaced or missing tool results before strict provider replay, use Codex-compatible `aborted` outputs for OpenAI Responses history, and drop partial aborted/error transport turns before retries. +- Browser/profiles: recover from stale Chromium `Singleton*` profile locks after crashes or host moves by clearing dead/foreign locks and retrying launch once. Thanks @seanc-dev. - Reply media: allow sandboxed replies to deliver OpenClaw-managed `media/outbound` and `media/tool-*` attachments without treating them as sandbox escapes, while keeping alias-escape checks on the managed media root. Fixes #71138. Thanks @mayor686, @truffle-dev, and @neeravmakwana. - CLI/agent: keep `openclaw agent --json` stdout reserved for the JSON response by routing gateway, plugin, and embedded-fallback diagnostics to stderr before execution starts. Fixes #71319. - Agents/Gemini: retry reasoning-only, empty, and planning-only Gemini turns instead of letting sessions silently stall. Fixes #71074. (#71362) Thanks @neeravmakwana. diff --git a/docs/tools/browser-linux-troubleshooting.md b/docs/tools/browser-linux-troubleshooting.md index b8df2b2f26f..9362d937bb7 100644 --- a/docs/tools/browser-linux-troubleshooting.md +++ b/docs/tools/browser-linux-troubleshooting.md @@ -25,6 +25,16 @@ chromium-browser is already the newest version (2:1snap1-0ubuntu2). This is NOT a real browser - it's just a wrapper. +Other common Linux launch failures: + +- `The profile appears to be in use by another Chromium process` means Chrome + found stale `Singleton*` lock files in the managed profile directory. OpenClaw + removes those locks and retries once when the lock points at a dead or + different-host process. +- `Missing X server or $DISPLAY` means OpenClaw is trying to launch a visible + browser on a host without a desktop session. Use `browser.headless: true`, + start `Xvfb`, or run OpenClaw in a real desktop session. + ### Solution 1: Install Google Chrome (Recommended) Install the official Google Chrome `.deb` package, which is not sandboxed by snap: diff --git a/extensions/browser/src/browser/chrome.internal.test.ts b/extensions/browser/src/browser/chrome.internal.test.ts index 3b9d340fb24..aa6136addc7 100644 --- a/extensions/browser/src/browser/chrome.internal.test.ts +++ b/extensions/browser/src/browser/chrome.internal.test.ts @@ -433,6 +433,66 @@ describe("chrome.ts internal", () => { } }); + it("clears stale singleton locks and retries once after profile-in-use launch failure", async () => { + let cdpReachable = false; + vi.stubGlobal( + "fetch", + vi.fn(async () => { + if (!cdpReachable) { + throw new Error("ECONNREFUSED"); + } + return { + ok: true, + json: async () => ({ webSocketDebuggerUrl: "ws://127.0.0.1/devtools" }), + } as unknown as Response; + }), + ); + vi.spyOn(fs, "existsSync").mockImplementation((p) => { + const s = String(p); + if (s === "/tmp/profile-chrome" || s.endsWith("Local State") || s.endsWith("Preferences")) { + return true; + } + return false; + }); + + let spawnCalls = 0; + const firstProc = makeFakeProc(); + const secondProc = makeFakeProc(); + spawnMock.mockImplementation(() => { + spawnCalls += 1; + if (spawnCalls === 1) { + setTimeout(() => { + firstProc.stderr.emit( + "data", + Buffer.from("The profile appears to be in use by another Chromium process"), + ); + }, 0); + return firstProc; + } + cdpReachable = true; + return secondProc; + }); + + const profile = { ...makeProfile(18888), executablePath: "/tmp/profile-chrome" }; + const userDataDir = resolveOpenClawUserDataDir(profile.name); + await fsp.mkdir(userDataDir, { recursive: true }); + await fsp.writeFile(path.join(userDataDir, "SingletonCookie"), "cookie"); + await fsp.writeFile(path.join(userDataDir, "SingletonSocket"), "socket"); + await fsp.symlink("remote-host-535", path.join(userDataDir, "SingletonLock")); + + try { + const running = await launchOpenClawChrome(makeResolved(), profile); + expect(running.proc).toBe(secondProc); + expect(firstProc.kill).toHaveBeenCalledWith("SIGKILL"); + expect(spawnCalls).toBe(2); + expect(fs.existsSync(path.join(userDataDir, "SingletonLock"))).toBe(false); + expect(fs.existsSync(path.join(userDataDir, "SingletonSocket"))).toBe(false); + running.proc.kill?.("SIGTERM"); + } finally { + await fsp.rm(userDataDir, { recursive: true, force: true }); + } + }); + it("throws with stderr hint + sandbox hint when CDP never becomes reachable", async () => { const originalPlatform = process.platform; Object.defineProperty(process, "platform", { value: "linux" }); diff --git a/extensions/browser/src/browser/chrome.test.ts b/extensions/browser/src/browser/chrome.test.ts index 4d650e21064..c9b2cebed09 100644 --- a/extensions/browser/src/browser/chrome.test.ts +++ b/extensions/browser/src/browser/chrome.test.ts @@ -11,6 +11,7 @@ import { resolveGoogleChromeExecutableForPlatform, } from "./chrome.executables.js"; import { + clearStaleChromeSingletonLocks, decorateOpenClawProfile, diagnoseChromeCdp, ensureProfileCleanExit, @@ -212,6 +213,55 @@ describe("browser chrome profile decoration", () => { const profile = prefs.profile as Record; expect(profile.name).toBe(DEFAULT_OPENCLAW_BROWSER_PROFILE_NAME); }); + + it("clears stale singleton artifacts when the lock points at another host", async () => { + const userDataDir = await createUserDataDir(); + await fsp.writeFile(path.join(userDataDir, "SingletonCookie"), "cookie"); + await fsp.writeFile(path.join(userDataDir, "SingletonSocket"), "socket"); + await fsp.symlink("remote-host-535", path.join(userDataDir, "SingletonLock")); + + expect(clearStaleChromeSingletonLocks(userDataDir, "local-host")).toBe(true); + expect(fs.existsSync(path.join(userDataDir, "SingletonLock"))).toBe(false); + expect(fs.existsSync(path.join(userDataDir, "SingletonSocket"))).toBe(false); + expect(fs.existsSync(path.join(userDataDir, "SingletonCookie"))).toBe(false); + }); + + it("clears stale singleton artifacts when the lock PID is dead on the current host", async () => { + const userDataDir = await createUserDataDir(); + const deadPid = 2147483646; + await fsp.symlink(`${os.hostname()}-${deadPid}`, path.join(userDataDir, "SingletonLock")); + + expect(clearStaleChromeSingletonLocks(userDataDir, os.hostname())).toBe(true); + expect(fs.existsSync(path.join(userDataDir, "SingletonLock"))).toBe(false); + }); + + it("keeps singleton artifacts when the lock points at a current-host live process", async () => { + const userDataDir = await createUserDataDir(); + await fsp.symlink(`${os.hostname()}-${process.pid}`, path.join(userDataDir, "SingletonLock")); + + expect(clearStaleChromeSingletonLocks(userDataDir, os.hostname())).toBe(false); + expect(fs.lstatSync(path.join(userDataDir, "SingletonLock")).isSymbolicLink()).toBe(true); + }); + + it("keeps singleton artifacts when the lock PID exists but cannot be signaled", async () => { + const userDataDir = await createUserDataDir(); + await fsp.symlink(`${os.hostname()}-12345`, path.join(userDataDir, "SingletonLock")); + const err = new Error("operation not permitted") as NodeJS.ErrnoException; + err.code = "EPERM"; + const killSpy = vi.spyOn(process, "kill").mockImplementation(((pid, signal) => { + if (pid === 12345 && signal === 0) { + throw err; + } + return true; + }) as typeof process.kill); + + try { + expect(clearStaleChromeSingletonLocks(userDataDir, os.hostname())).toBe(false); + expect(fs.lstatSync(path.join(userDataDir, "SingletonLock")).isSymbolicLink()).toBe(true); + } finally { + killSpy.mockRestore(); + } + }); }); describe("browser chrome helpers", () => { diff --git a/extensions/browser/src/browser/chrome.ts b/extensions/browser/src/browser/chrome.ts index 95c046ebe7e..086f460fcb5 100644 --- a/extensions/browser/src/browser/chrome.ts +++ b/extensions/browser/src/browser/chrome.ts @@ -53,6 +53,13 @@ import { } from "./constants.js"; const log = createSubsystemLogger("browser").child("chrome"); +const CHROME_SINGLETON_LOCK_PATHS = [ + "SingletonLock", + "SingletonSocket", + "SingletonCookie", +] as const; +const CHROME_SINGLETON_IN_USE_PATTERN = /profile appears to be in use by another chromium process/i; +const CHROME_MISSING_DISPLAY_PATTERN = /missing x server|\$DISPLAY/i; export type { BrowserExecutable } from "./chrome.executables.js"; export { @@ -81,6 +88,109 @@ function exists(filePath: string) { } } +function processExists(pid: number): boolean { + if (!Number.isInteger(pid) || pid <= 0) { + return false; + } + try { + process.kill(pid, 0); + return true; + } catch (err) { + if ((err as NodeJS.ErrnoException).code === "EPERM") { + return true; + } + return false; + } +} + +function clearChromeSingletonArtifacts(userDataDir: string) { + for (const basename of CHROME_SINGLETON_LOCK_PATHS) { + try { + fs.rmSync(path.join(userDataDir, basename), { force: true }); + } catch { + // ignore best-effort cleanup + } + } +} + +export function clearStaleChromeSingletonLocks( + userDataDir: string, + hostname = os.hostname(), +): boolean { + const lockPath = path.join(userDataDir, "SingletonLock"); + let target: string; + try { + target = fs.readlinkSync(lockPath); + } catch { + return false; + } + + const match = /^(?.+)-(?\d+)$/.exec(target); + if (!match?.groups) { + return false; + } + + const lockHost = normalizeOptionalString(match.groups.lockHost) ?? ""; + const pid = Number.parseInt(match.groups.pid ?? "", 10); + if (lockHost === hostname && processExists(pid)) { + return false; + } + + clearChromeSingletonArtifacts(userDataDir); + return true; +} + +async function waitForChromeProcessExit(proc: ChildProcess, timeoutMs: number): Promise { + if (proc.exitCode != null || proc.signalCode != null || proc.killed) { + return; + } + await new Promise((resolve) => { + const timer = setTimeout(() => { + proc.off("exit", onExit); + proc.off("close", onExit); + resolve(); + }, timeoutMs); + const onExit = () => { + clearTimeout(timer); + resolve(); + }; + proc.once("exit", onExit); + proc.once("close", onExit); + }); +} + +async function terminateChromeForRetry(proc: ChildProcess, userDataDir: string) { + try { + proc.kill("SIGKILL"); + } catch { + // ignore + } + await waitForChromeProcessExit(proc, CHROME_BOOTSTRAP_EXIT_TIMEOUT_MS); + clearStaleChromeSingletonLocks(userDataDir); +} + +function chromeLaunchHints(params: { + stderrOutput: string; + resolved: ResolvedBrowserConfig; + profile: ResolvedBrowserProfile; +}): string { + const hints: string[] = []; + if (process.platform === "linux" && !params.resolved.noSandbox) { + hints.push("If running in a container or as root, try setting browser.noSandbox: true."); + } + if (CHROME_MISSING_DISPLAY_PATTERN.test(params.stderrOutput) && !params.profile.headless) { + hints.push( + "No DISPLAY/X server was detected. Enable browser.headless: true, start Xvfb, or run the Gateway in a desktop session.", + ); + } + if (CHROME_SINGLETON_IN_USE_PATTERN.test(params.stderrOutput)) { + hints.push( + `The Chromium profile "${params.profile.name}" is locked. Stop the existing browser or remove stale Singleton* lock files under ~/.openclaw/browser/${params.profile.name}/user-data.`, + ); + } + return hints.length > 0 ? `\nHint: ${hints.join("\nHint: ")}` : ""; +} + export type RunningChrome = { pid: number; exe: BrowserExecutable; @@ -363,66 +473,80 @@ export async function launchOpenClawChrome( log.warn(`openclaw browser clean-exit prefs failed: ${String(err)}`); } - const proc = spawnOnce(); + const launchOnceAndWait = async (allowSingletonRecovery: boolean): Promise => { + const proc = spawnOnce(); - // Collect stderr for diagnostics in case Chrome fails to start. - // The listener is removed on success to avoid unbounded memory growth - // from a long-lived Chrome process that emits periodic warnings. - const stderrChunks: Buffer[] = []; - const onStderr = (chunk: Buffer) => { - stderrChunks.push(chunk); - }; - proc.stderr?.on("data", onStderr); + // Collect stderr for diagnostics in case Chrome fails to start. + // The listener is removed on success to avoid unbounded memory growth + // from a long-lived Chrome process that emits periodic warnings. + const stderrChunks: Buffer[] = []; + const onStderr = (chunk: Buffer) => { + stderrChunks.push(chunk); + }; + proc.stderr?.on("data", onStderr); - // Wait for CDP to come up. - const readyDeadline = Date.now() + CHROME_LAUNCH_READY_WINDOW_MS; - while (Date.now() < readyDeadline) { - if (await isChromeReachable(profile.cdpUrl)) { - break; - } - await new Promise((r) => setTimeout(r, CHROME_LAUNCH_READY_POLL_MS)); - } - - if (!(await isChromeReachable(profile.cdpUrl))) { - const diagnosticText = await diagnoseChromeCdp(profile.cdpUrl) - .then(formatChromeCdpDiagnostic) - .catch((err) => `CDP diagnostic failed: ${safeChromeCdpErrorMessage(err)}.`); - const stderrOutput = - normalizeOptionalString(Buffer.concat(stderrChunks).toString("utf8")) ?? ""; - const stderrHint = stderrOutput - ? `\nChrome stderr:\n${stderrOutput.slice(0, CHROME_STDERR_HINT_MAX_CHARS)}` - : ""; - const sandboxHint = - process.platform === "linux" && !resolved.noSandbox - ? "\nHint: If running in a container or as root, try setting browser.noSandbox: true in config." - : ""; try { - proc.kill("SIGKILL"); - } catch { - // ignore + const readyDeadline = Date.now() + CHROME_LAUNCH_READY_WINDOW_MS; + while (Date.now() < readyDeadline) { + if (await isChromeReachable(profile.cdpUrl)) { + break; + } + await new Promise((r) => setTimeout(r, CHROME_LAUNCH_READY_POLL_MS)); + } + + if (!(await isChromeReachable(profile.cdpUrl))) { + const diagnosticText = await diagnoseChromeCdp(profile.cdpUrl) + .then(formatChromeCdpDiagnostic) + .catch((err) => `CDP diagnostic failed: ${safeChromeCdpErrorMessage(err)}.`); + const stderrOutput = + normalizeOptionalString(Buffer.concat(stderrChunks).toString("utf8")) ?? ""; + if ( + allowSingletonRecovery && + CHROME_SINGLETON_IN_USE_PATTERN.test(stderrOutput) && + clearStaleChromeSingletonLocks(userDataDir) + ) { + log.warn( + `Removed stale Chromium Singleton* locks for profile "${profile.name}" and retrying launch.`, + ); + await terminateChromeForRetry(proc, userDataDir); + return await launchOnceAndWait(false); + } + const stderrHint = stderrOutput + ? `\nChrome stderr:\n${stderrOutput.slice(0, CHROME_STDERR_HINT_MAX_CHARS)}` + : ""; + const launchHints = chromeLaunchHints({ stderrOutput, resolved, profile }); + try { + proc.kill("SIGKILL"); + } catch { + // ignore + } + throw new Error( + `Failed to start Chrome CDP on port ${profile.cdpPort} for profile "${profile.name}". ${diagnosticText}${launchHints}${stderrHint}`, + ); + } + + const pid = proc.pid ?? -1; + log.info( + `🦞 openclaw browser started (${exe.kind}) profile "${profile.name}" on 127.0.0.1:${profile.cdpPort} (pid ${pid})`, + ); + + return { + pid, + exe, + userDataDir, + cdpPort: profile.cdpPort, + startedAt, + proc, + }; + } finally { + // Chrome started successfully or launch failed — detach the stderr listener + // and release the buffer. + proc.stderr?.off("data", onStderr); + stderrChunks.length = 0; } - throw new Error( - `Failed to start Chrome CDP on port ${profile.cdpPort} for profile "${profile.name}". ${diagnosticText}${sandboxHint}${stderrHint}`, - ); - } - - // Chrome started successfully — detach the stderr listener and release the buffer. - proc.stderr?.off("data", onStderr); - stderrChunks.length = 0; - - const pid = proc.pid ?? -1; - log.info( - `🦞 openclaw browser started (${exe.kind}) profile "${profile.name}" on 127.0.0.1:${profile.cdpPort} (pid ${pid})`, - ); - - return { - pid, - exe, - userDataDir, - cdpPort: profile.cdpPort, - startedAt, - proc, }; + + return await launchOnceAndWait(true); } export async function stopOpenClawChrome(