fix: prevent sandbox browser CDP startup hangs (#62873) (thanks @Syysean)

* refactor(sandbox): remove socat proxy and fix chromium keyring deadlock

* fix(sandbox): address review feedback by reinstating cdp isolation and stability flags

* fix(sandbox): increase entrypoint cdp timeout to 20s to honor autoStartTimeoutMs

* fix(sandbox): align implementation with PR description (keyring bypass, fail-fast, watchdog)

* fix

* fix(sandbox): remove bash CDP watchdog to eliminate dual-timeout race

* fix(sandbox): apply final fail-fast and lifecycle bindings

* fix(sandbox): restore noVNC and CDP port offset

* fix(sandbox): add max-time to curl to prevent HTTP hang

* fix(sandbox): align timeout with host and restore env flags

* fix(sandbox): pass auto-start timeout to container and restore wait -n

* fix(sandbox): update hash input type to include autoStartTimeoutMs

* fix(sandbox): implement production-grade lifecycle and timeout management

- Add strict integer validation for port and timeout environment variables
- Implement robust two-stage trap cleanup (SIGTERM with SIGKILL fallback) to prevent zombie processes
- Refactor CDP readiness probe to use absolute millisecond-precision deadlines
- Add early fail-fast detection if Chromium crashes during the startup phase
- Track all daemon PIDs explicitly for reliable teardown via wait -n

* fix(sandbox): allow renderer process limit to be 0 for chromium default

* fix(sandbox): add autoStartTimeoutMs to SandboxBrowserHashInput type

* test(sandbox): cover browser timeout cleanup

---------

Co-authored-by: Ayaan Zaidi <hi@obviy.us>
This commit is contained in:
Sean
2026-04-10 12:56:38 +08:00
committed by GitHub
parent 4ad4ee1962
commit c61be87b0e
5 changed files with 180 additions and 54 deletions

View File

@@ -1,20 +1,7 @@
#!/usr/bin/env bash
set -euo pipefail
set -Eeuo pipefail
dedupe_chrome_args() {
local -A seen_args=()
local -a unique_args=()
for arg in "${CHROME_ARGS[@]}"; do
if [[ -n "${seen_args["$arg"]:+x}" ]]; then
continue
fi
seen_args["$arg"]=1
unique_args+=("$arg")
done
CHROME_ARGS=("${unique_args[@]}")
}
export DBUS_SESSION_BUS_ADDRESS=/dev/null
export DISPLAY=:1
export HOME=/tmp/openclaw-home
@@ -29,21 +16,82 @@ ENABLE_NOVNC="${OPENCLAW_BROWSER_ENABLE_NOVNC:-1}"
HEADLESS="${OPENCLAW_BROWSER_HEADLESS:-0}"
ALLOW_NO_SANDBOX="${OPENCLAW_BROWSER_NO_SANDBOX:-0}"
NOVNC_PASSWORD="${OPENCLAW_BROWSER_NOVNC_PASSWORD:-}"
DISABLE_GRAPHICS_FLAGS="${OPENCLAW_BROWSER_DISABLE_GRAPHICS_FLAGS:-1}"
DISABLE_EXTENSIONS="${OPENCLAW_BROWSER_DISABLE_EXTENSIONS:-1}"
RENDERER_PROCESS_LIMIT="${OPENCLAW_BROWSER_RENDERER_PROCESS_LIMIT:-2}"
AUTO_START_TIMEOUT_MS="${OPENCLAW_BROWSER_AUTO_START_TIMEOUT_MS:-12000}"
validate_uint() {
local name="$1"
local value="$2"
local min="${3:-0}"
local max="${4:-4294967295}"
if ! [[ "$value" =~ ^[0-9]+$ ]]; then
echo "[sandbox] ERROR: $name must be an integer, got: ${value}" >&2
exit 1
fi
if (( value < min || value > max )); then
echo "[sandbox] ERROR: $name out of range (${min}..${max}), got: ${value}" >&2
exit 1
fi
}
validate_uint "CDP_PORT" "$CDP_PORT" 1 65535
validate_uint "VNC_PORT" "$VNC_PORT" 1 65535
validate_uint "NOVNC_PORT" "$NOVNC_PORT" 1 65535
validate_uint "AUTO_START_TIMEOUT_MS" "$AUTO_START_TIMEOUT_MS" 1 2147483647
if [[ -n "$RENDERER_PROCESS_LIMIT" ]]; then
validate_uint "RENDERER_PROCESS_LIMIT" "$RENDERER_PROCESS_LIMIT" 0 2147483647
fi
cleanup() {
local code="${1:-1}"
trap - EXIT INT TERM
local pids=()
local pid
for pid in "${WEBSOCKIFY_PID:-}" "${X11VNC_PID:-}" "${SOCAT_PID:-}" "${CHROME_PID:-}" "${XVFB_PID:-}"; do
if [[ -n "${pid:-}" ]]; then
pids+=("$pid")
fi
done
if ((${#pids[@]} > 0)); then
kill -TERM "${pids[@]}" 2>/dev/null || true
for _ in {1..10}; do
local alive=0
for pid in "${pids[@]}"; do
if kill -0 "$pid" 2>/dev/null; then
alive=1
break
fi
done
if [[ "$alive" == "0" ]]; then
break
fi
sleep 0.2
done
kill -KILL "${pids[@]}" 2>/dev/null || true
wait 2>/dev/null || true
fi
exit "$code"
}
trap 'cleanup "$?"' EXIT
trap 'cleanup 130' INT
trap 'cleanup 143' TERM
mkdir -p "${HOME}" "${HOME}/.chrome" "${XDG_CONFIG_HOME}" "${XDG_CACHE_HOME}"
Xvfb :1 -screen 0 1280x800x24 -ac -nolisten tcp &
if [[ "${HEADLESS}" == "1" ]]; then
CHROME_ARGS=(
"--headless=new"
)
else
CHROME_ARGS=()
fi
XVFB_PID=$!
echo "[sandbox] Xvfb started (PID: ${XVFB_PID})"
if [[ "${CDP_PORT}" -ge 65535 ]]; then
CHROME_CDP_PORT="$((CDP_PORT - 1))"
@@ -51,7 +99,7 @@ else
CHROME_CDP_PORT="$((CDP_PORT + 1))"
fi
CHROME_ARGS+=(
CHROME_ARGS=(
"--remote-debugging-address=127.0.0.1"
"--remote-debugging-port=${CHROME_CDP_PORT}"
"--user-data-dir=${HOME}/.chrome"
@@ -59,15 +107,24 @@ CHROME_ARGS+=(
"--no-default-browser-check"
"--disable-dev-shm-usage"
"--disable-background-networking"
"--disable-features=TranslateUI"
"--disable-breakpad"
"--disable-crash-reporter"
"--no-zygote"
"--metrics-recording-only"
"--password-store=basic"
"--use-mock-keychain"
)
if [[ "${HEADLESS}" == "1" ]]; then
CHROME_ARGS+=("--headless=new")
fi
if [[ "${ALLOW_NO_SANDBOX}" == "1" ]]; then
CHROME_ARGS+=("--no-sandbox" "--disable-setuid-sandbox")
fi
DISABLE_GRAPHICS_FLAGS_LOWER="${DISABLE_GRAPHICS_FLAGS,,}"
if [[ "${DISABLE_GRAPHICS_FLAGS_LOWER}" == "1" || "${DISABLE_GRAPHICS_FLAGS_LOWER}" == "true" || "${DISABLE_GRAPHICS_FLAGS_LOWER}" == "yes" || "${DISABLE_GRAPHICS_FLAGS_LOWER}" == "on" ]]; then
if [[ "${DISABLE_GRAPHICS_FLAGS_LOWER}" =~ ^(1|true|yes|on)$ ]]; then
CHROME_ARGS+=(
"--disable-3d-apis"
"--disable-gpu"
@@ -76,52 +133,75 @@ if [[ "${DISABLE_GRAPHICS_FLAGS_LOWER}" == "1" || "${DISABLE_GRAPHICS_FLAGS_LOWE
fi
DISABLE_EXTENSIONS_LOWER="${DISABLE_EXTENSIONS,,}"
if [[ "${DISABLE_EXTENSIONS_LOWER}" == "1" || "${DISABLE_EXTENSIONS_LOWER}" == "true" || "${DISABLE_EXTENSIONS_LOWER}" == "yes" || "${DISABLE_EXTENSIONS_LOWER}" == "on" ]]; then
CHROME_ARGS+=(
"--disable-extensions"
)
if [[ "${DISABLE_EXTENSIONS_LOWER}" =~ ^(1|true|yes|on)$ ]]; then
CHROME_ARGS+=("--disable-extensions")
fi
if [[ "${RENDERER_PROCESS_LIMIT}" =~ ^[0-9]+$ && "${RENDERER_PROCESS_LIMIT}" -gt 0 ]]; then
CHROME_ARGS+=("--renderer-process-limit=${RENDERER_PROCESS_LIMIT}")
fi
if [[ "${ALLOW_NO_SANDBOX}" == "1" ]]; then
CHROME_ARGS+=(
"--no-sandbox"
"--disable-setuid-sandbox"
)
fi
dedupe_chrome_args
echo "[sandbox] Starting Chromium..."
chromium "${CHROME_ARGS[@]}" about:blank &
CHROME_PID=$!
echo "[sandbox] Chromium started (PID: ${CHROME_PID})"
for _ in $(seq 1 50); do
if curl -sS --max-time 1 "http://127.0.0.1:${CHROME_CDP_PORT}/json/version" >/dev/null; then
start_ms=$(date +%s%3N)
deadline_ms=$(( start_ms + AUTO_START_TIMEOUT_MS ))
CDP_READY=0
probe_url="http://127.0.0.1:${CHROME_CDP_PORT}/json/version"
echo "[sandbox] Waiting up to ${AUTO_START_TIMEOUT_MS}ms for CDP on port ${CHROME_CDP_PORT}..."
while (( $(date +%s%3N) < deadline_ms )); do
if ! kill -0 "${CHROME_PID}" 2>/dev/null; then
echo "[sandbox] ERROR: Chromium exited before CDP became ready."
exit 1
fi
if curl -fsS --max-time 0.5 "${probe_url}" >/dev/null; then
CDP_READY=1
break
fi
sleep 0.1
sleep 0.2
done
if [[ "${CDP_READY}" == "0" ]]; then
echo "[sandbox] ERROR: CDP failed to start within ${AUTO_START_TIMEOUT_MS}ms."
exit 1
fi
echo "[sandbox] CDP ready. Starting socat..."
SOCAT_LISTEN_ADDR="TCP-LISTEN:${CDP_PORT},fork,reuseaddr,bind=0.0.0.0"
if [[ -n "${CDP_SOURCE_RANGE}" ]]; then
SOCAT_LISTEN_ADDR="${SOCAT_LISTEN_ADDR},range=${CDP_SOURCE_RANGE}"
fi
socat "${SOCAT_LISTEN_ADDR}" "TCP:127.0.0.1:${CHROME_CDP_PORT}" &
SOCAT_PID=$!
echo "[sandbox] socat started (PID: ${SOCAT_PID})"
if [[ "${ENABLE_NOVNC}" == "1" && "${HEADLESS}" != "1" ]]; then
# VNC auth passwords are max 8 chars; use a random default when not provided.
if [[ -z "${NOVNC_PASSWORD}" ]]; then
NOVNC_PASSWORD="$(< /proc/sys/kernel/random/uuid)"
NOVNC_PASSWORD="${NOVNC_PASSWORD//-/}"
NOVNC_PASSWORD="${NOVNC_PASSWORD:0:8}"
fi
NOVNC_PASSWD_FILE="${HOME}/.vnc/passwd"
mkdir -p "${HOME}/.vnc"
x11vnc -storepasswd "${NOVNC_PASSWORD}" "${NOVNC_PASSWD_FILE}" >/dev/null
chmod 600 "${NOVNC_PASSWD_FILE}"
x11vnc -display :1 -rfbport "${VNC_PORT}" -shared -forever -rfbauth "${NOVNC_PASSWD_FILE}" -localhost &
x11vnc -storepasswd "${NOVNC_PASSWORD}" "${HOME}/.vnc/passwd" >/dev/null
chmod 600 "${HOME}/.vnc/passwd"
x11vnc -display :1 -rfbport "${VNC_PORT}" -shared -forever -rfbauth "${HOME}/.vnc/passwd" -localhost &
X11VNC_PID=$!
echo "[sandbox] x11vnc started (PID: ${X11VNC_PID})"
websockify --web /usr/share/novnc/ "${NOVNC_PORT}" "localhost:${VNC_PORT}" &
WEBSOCKIFY_PID=$!
echo "[sandbox] websockify started (PID: ${WEBSOCKIFY_PID})"
fi
echo "[sandbox] Container running. Monitoring all sub-processes..."
wait -n

View File

@@ -108,6 +108,7 @@ describe("ensureSandboxBrowser create args", () => {
});
beforeEach(() => {
vi.restoreAllMocks();
BROWSER_BRIDGES.clear();
resetNoVncObserverTokensForTests();
dockerMocks.dockerContainerState.mockClear();
@@ -239,4 +240,39 @@ describe("ensureSandboxBrowser create args", () => {
const labels = collectDockerFlagValues(createArgs ?? [], "--label");
expect(labels).toContain(`openclaw.mountFormatVersion=${SANDBOX_MOUNT_FORMAT_VERSION}`);
});
it("force-removes the browser container when CDP never becomes reachable", async () => {
vi.spyOn(globalThis, "fetch").mockRejectedValue(new Error("timeout"));
bridgeMocks.startBrowserBridgeServer.mockImplementationOnce(async (params) => {
await params.onEnsureAttachTarget?.({});
return {
server: {} as never,
port: 19000,
baseUrl: "http://127.0.0.1:19000",
state: {
server: null,
port: 19000,
resolved: { profiles: {} },
profiles: new Map(),
},
};
});
const cfg = buildConfig(false);
cfg.browser.autoStartTimeoutMs = 1;
await expect(
ensureSandboxBrowser({
scopeKey: "session:test",
workspaceDir: "/tmp/workspace",
agentWorkspaceDir: "/tmp/workspace",
cfg,
}),
).rejects.toThrow("hung container has been forcefully removed");
expect(dockerMocks.execDocker).toHaveBeenCalledWith(
["rm", "-f", expect.stringMatching(/^openclaw-sbx-browser-session-test-/)],
{ allowFailure: true },
);
});
});

View File

@@ -167,6 +167,7 @@ export async function ensureSandboxBrowser(params: {
noVncPort: params.cfg.browser.noVncPort,
headless: params.cfg.browser.headless,
enableNoVnc: params.cfg.browser.enableNoVnc,
autoStartTimeoutMs: params.cfg.browser.autoStartTimeoutMs,
cdpSourceRange,
},
securityEpoch: SANDBOX_BROWSER_SECURITY_HASH_EPOCH,
@@ -262,14 +263,15 @@ export async function ensureSandboxBrowser(params: {
args.push("-e", `OPENCLAW_BROWSER_HEADLESS=${params.cfg.browser.headless ? "1" : "0"}`);
args.push("-e", `OPENCLAW_BROWSER_ENABLE_NOVNC=${params.cfg.browser.enableNoVnc ? "1" : "0"}`);
args.push("-e", `OPENCLAW_BROWSER_CDP_PORT=${params.cfg.browser.cdpPort}`);
args.push(
"-e",
`OPENCLAW_BROWSER_AUTO_START_TIMEOUT_MS=${params.cfg.browser.autoStartTimeoutMs}`,
);
if (cdpSourceRange) {
args.push("-e", `${CDP_SOURCE_RANGE_ENV_KEY}=${cdpSourceRange}`);
}
args.push("-e", `OPENCLAW_BROWSER_VNC_PORT=${params.cfg.browser.vncPort}`);
args.push("-e", `OPENCLAW_BROWSER_NOVNC_PORT=${params.cfg.browser.noVncPort}`);
// Chromium's setuid/namespace sandbox cannot work inside Docker containers
// (PID namespace creation requires privileges Docker does not grant by default).
// The container itself provides isolation, so --no-sandbox is safe here.
args.push("-e", "OPENCLAW_BROWSER_NO_SANDBOX=1");
if (noVncEnabled && noVncPassword) {
args.push("-e", `${NOVNC_PASSWORD_ENV_KEY}=${noVncPassword}`);
@@ -302,9 +304,6 @@ export async function ensureSandboxBrowser(params: {
let desiredAuthToken = normalizeOptionalString(params.bridgeAuth?.token);
let desiredAuthPassword = normalizeOptionalString(params.bridgeAuth?.password);
if (!desiredAuthToken && !desiredAuthPassword) {
// Always require auth for the sandbox bridge server, even if gateway auth
// mode doesn't produce a shared secret (e.g. trusted-proxy).
// Keep it stable across calls by reusing the existing bridge auth.
desiredAuthToken = existing?.authToken;
desiredAuthPassword = existing?.authPassword;
if (!desiredAuthToken && !desiredAuthPassword) {
@@ -349,8 +348,9 @@ export async function ensureSandboxBrowser(params: {
timeoutMs: params.cfg.browser.autoStartTimeoutMs,
});
if (!ok) {
await execDocker(["rm", "-f", containerName], { allowFailure: true });
throw new Error(
`Sandbox browser CDP did not become reachable on 127.0.0.1:${mappedCdp} within ${params.cfg.browser.autoStartTimeoutMs}ms.`,
`Sandbox browser CDP did not become reachable on 127.0.0.1:${mappedCdp} within ${params.cfg.browser.autoStartTimeoutMs}ms. The hung container has been forcefully removed.`,
);
}
}

View File

@@ -118,6 +118,7 @@ describe("computeSandboxBrowserConfigHash", () => {
noVncPort: 6080,
headless: false,
enableNoVnc: true,
autoStartTimeoutMs: 12000,
},
securityEpoch: "epoch-v1",
workspaceAccess: "rw" as const,
@@ -150,6 +151,7 @@ describe("computeSandboxBrowserConfigHash", () => {
noVncPort: 6080,
headless: false,
enableNoVnc: true,
autoStartTimeoutMs: 12000,
},
workspaceAccess: "rw" as const,
workspaceDir: "/tmp/workspace",
@@ -176,6 +178,7 @@ describe("computeSandboxBrowserConfigHash", () => {
noVncPort: 6080,
headless: false,
enableNoVnc: true,
autoStartTimeoutMs: 12000,
},
securityEpoch: "epoch-v1",
workspaceAccess: "rw" as const,
@@ -204,6 +207,7 @@ describe("computeSandboxBrowserConfigHash", () => {
noVncPort: 6080,
headless: false,
enableNoVnc: true,
autoStartTimeoutMs: 12000,
},
securityEpoch: "epoch-v1",
workspaceAccess: "rw" as const,

View File

@@ -13,7 +13,13 @@ type SandboxBrowserHashInput = {
docker: SandboxDockerConfig;
browser: Pick<
SandboxBrowserConfig,
"cdpPort" | "cdpSourceRange" | "vncPort" | "noVncPort" | "headless" | "enableNoVnc"
| "cdpPort"
| "cdpSourceRange"
| "vncPort"
| "noVncPort"
| "headless"
| "enableNoVnc"
| "autoStartTimeoutMs"
>;
securityEpoch: string;
workspaceAccess: SandboxWorkspaceAccess;