mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-12 09:41:11 +00:00
fix: prevent sandbox browser CDP startup hangs (#62873) (thanks @Syysean)
* refactor(sandbox): remove socat proxy and fix chromium keyring deadlock * fix(sandbox): address review feedback by reinstating cdp isolation and stability flags * fix(sandbox): increase entrypoint cdp timeout to 20s to honor autoStartTimeoutMs * fix(sandbox): align implementation with PR description (keyring bypass, fail-fast, watchdog) * fix * fix(sandbox): remove bash CDP watchdog to eliminate dual-timeout race * fix(sandbox): apply final fail-fast and lifecycle bindings * fix(sandbox): restore noVNC and CDP port offset * fix(sandbox): add max-time to curl to prevent HTTP hang * fix(sandbox): align timeout with host and restore env flags * fix(sandbox): pass auto-start timeout to container and restore wait -n * fix(sandbox): update hash input type to include autoStartTimeoutMs * fix(sandbox): implement production-grade lifecycle and timeout management - Add strict integer validation for port and timeout environment variables - Implement robust two-stage trap cleanup (SIGTERM with SIGKILL fallback) to prevent zombie processes - Refactor CDP readiness probe to use absolute millisecond-precision deadlines - Add early fail-fast detection if Chromium crashes during the startup phase - Track all daemon PIDs explicitly for reliable teardown via wait -n * fix(sandbox): allow renderer process limit to be 0 for chromium default * fix(sandbox): add autoStartTimeoutMs to SandboxBrowserHashInput type * test(sandbox): cover browser timeout cleanup --------- Co-authored-by: Ayaan Zaidi <hi@obviy.us>
This commit is contained in:
@@ -1,20 +1,7 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
set -Eeuo pipefail
|
||||
|
||||
dedupe_chrome_args() {
|
||||
local -A seen_args=()
|
||||
local -a unique_args=()
|
||||
|
||||
for arg in "${CHROME_ARGS[@]}"; do
|
||||
if [[ -n "${seen_args["$arg"]:+x}" ]]; then
|
||||
continue
|
||||
fi
|
||||
seen_args["$arg"]=1
|
||||
unique_args+=("$arg")
|
||||
done
|
||||
|
||||
CHROME_ARGS=("${unique_args[@]}")
|
||||
}
|
||||
export DBUS_SESSION_BUS_ADDRESS=/dev/null
|
||||
|
||||
export DISPLAY=:1
|
||||
export HOME=/tmp/openclaw-home
|
||||
@@ -29,21 +16,82 @@ ENABLE_NOVNC="${OPENCLAW_BROWSER_ENABLE_NOVNC:-1}"
|
||||
HEADLESS="${OPENCLAW_BROWSER_HEADLESS:-0}"
|
||||
ALLOW_NO_SANDBOX="${OPENCLAW_BROWSER_NO_SANDBOX:-0}"
|
||||
NOVNC_PASSWORD="${OPENCLAW_BROWSER_NOVNC_PASSWORD:-}"
|
||||
|
||||
DISABLE_GRAPHICS_FLAGS="${OPENCLAW_BROWSER_DISABLE_GRAPHICS_FLAGS:-1}"
|
||||
DISABLE_EXTENSIONS="${OPENCLAW_BROWSER_DISABLE_EXTENSIONS:-1}"
|
||||
RENDERER_PROCESS_LIMIT="${OPENCLAW_BROWSER_RENDERER_PROCESS_LIMIT:-2}"
|
||||
AUTO_START_TIMEOUT_MS="${OPENCLAW_BROWSER_AUTO_START_TIMEOUT_MS:-12000}"
|
||||
|
||||
validate_uint() {
|
||||
local name="$1"
|
||||
local value="$2"
|
||||
local min="${3:-0}"
|
||||
local max="${4:-4294967295}"
|
||||
|
||||
if ! [[ "$value" =~ ^[0-9]+$ ]]; then
|
||||
echo "[sandbox] ERROR: $name must be an integer, got: ${value}" >&2
|
||||
exit 1
|
||||
fi
|
||||
if (( value < min || value > max )); then
|
||||
echo "[sandbox] ERROR: $name out of range (${min}..${max}), got: ${value}" >&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
validate_uint "CDP_PORT" "$CDP_PORT" 1 65535
|
||||
validate_uint "VNC_PORT" "$VNC_PORT" 1 65535
|
||||
validate_uint "NOVNC_PORT" "$NOVNC_PORT" 1 65535
|
||||
validate_uint "AUTO_START_TIMEOUT_MS" "$AUTO_START_TIMEOUT_MS" 1 2147483647
|
||||
if [[ -n "$RENDERER_PROCESS_LIMIT" ]]; then
|
||||
validate_uint "RENDERER_PROCESS_LIMIT" "$RENDERER_PROCESS_LIMIT" 0 2147483647
|
||||
fi
|
||||
|
||||
cleanup() {
|
||||
local code="${1:-1}"
|
||||
trap - EXIT INT TERM
|
||||
|
||||
local pids=()
|
||||
local pid
|
||||
|
||||
for pid in "${WEBSOCKIFY_PID:-}" "${X11VNC_PID:-}" "${SOCAT_PID:-}" "${CHROME_PID:-}" "${XVFB_PID:-}"; do
|
||||
if [[ -n "${pid:-}" ]]; then
|
||||
pids+=("$pid")
|
||||
fi
|
||||
done
|
||||
|
||||
if ((${#pids[@]} > 0)); then
|
||||
kill -TERM "${pids[@]}" 2>/dev/null || true
|
||||
|
||||
for _ in {1..10}; do
|
||||
local alive=0
|
||||
for pid in "${pids[@]}"; do
|
||||
if kill -0 "$pid" 2>/dev/null; then
|
||||
alive=1
|
||||
break
|
||||
fi
|
||||
done
|
||||
if [[ "$alive" == "0" ]]; then
|
||||
break
|
||||
fi
|
||||
sleep 0.2
|
||||
done
|
||||
|
||||
kill -KILL "${pids[@]}" 2>/dev/null || true
|
||||
wait 2>/dev/null || true
|
||||
fi
|
||||
|
||||
exit "$code"
|
||||
}
|
||||
|
||||
trap 'cleanup "$?"' EXIT
|
||||
trap 'cleanup 130' INT
|
||||
trap 'cleanup 143' TERM
|
||||
|
||||
mkdir -p "${HOME}" "${HOME}/.chrome" "${XDG_CONFIG_HOME}" "${XDG_CACHE_HOME}"
|
||||
|
||||
Xvfb :1 -screen 0 1280x800x24 -ac -nolisten tcp &
|
||||
|
||||
if [[ "${HEADLESS}" == "1" ]]; then
|
||||
CHROME_ARGS=(
|
||||
"--headless=new"
|
||||
)
|
||||
else
|
||||
CHROME_ARGS=()
|
||||
fi
|
||||
XVFB_PID=$!
|
||||
echo "[sandbox] Xvfb started (PID: ${XVFB_PID})"
|
||||
|
||||
if [[ "${CDP_PORT}" -ge 65535 ]]; then
|
||||
CHROME_CDP_PORT="$((CDP_PORT - 1))"
|
||||
@@ -51,7 +99,7 @@ else
|
||||
CHROME_CDP_PORT="$((CDP_PORT + 1))"
|
||||
fi
|
||||
|
||||
CHROME_ARGS+=(
|
||||
CHROME_ARGS=(
|
||||
"--remote-debugging-address=127.0.0.1"
|
||||
"--remote-debugging-port=${CHROME_CDP_PORT}"
|
||||
"--user-data-dir=${HOME}/.chrome"
|
||||
@@ -59,15 +107,24 @@ CHROME_ARGS+=(
|
||||
"--no-default-browser-check"
|
||||
"--disable-dev-shm-usage"
|
||||
"--disable-background-networking"
|
||||
"--disable-features=TranslateUI"
|
||||
"--disable-breakpad"
|
||||
"--disable-crash-reporter"
|
||||
"--no-zygote"
|
||||
"--metrics-recording-only"
|
||||
"--password-store=basic"
|
||||
"--use-mock-keychain"
|
||||
)
|
||||
|
||||
if [[ "${HEADLESS}" == "1" ]]; then
|
||||
CHROME_ARGS+=("--headless=new")
|
||||
fi
|
||||
|
||||
if [[ "${ALLOW_NO_SANDBOX}" == "1" ]]; then
|
||||
CHROME_ARGS+=("--no-sandbox" "--disable-setuid-sandbox")
|
||||
fi
|
||||
|
||||
DISABLE_GRAPHICS_FLAGS_LOWER="${DISABLE_GRAPHICS_FLAGS,,}"
|
||||
if [[ "${DISABLE_GRAPHICS_FLAGS_LOWER}" == "1" || "${DISABLE_GRAPHICS_FLAGS_LOWER}" == "true" || "${DISABLE_GRAPHICS_FLAGS_LOWER}" == "yes" || "${DISABLE_GRAPHICS_FLAGS_LOWER}" == "on" ]]; then
|
||||
if [[ "${DISABLE_GRAPHICS_FLAGS_LOWER}" =~ ^(1|true|yes|on)$ ]]; then
|
||||
CHROME_ARGS+=(
|
||||
"--disable-3d-apis"
|
||||
"--disable-gpu"
|
||||
@@ -76,52 +133,75 @@ if [[ "${DISABLE_GRAPHICS_FLAGS_LOWER}" == "1" || "${DISABLE_GRAPHICS_FLAGS_LOWE
|
||||
fi
|
||||
|
||||
DISABLE_EXTENSIONS_LOWER="${DISABLE_EXTENSIONS,,}"
|
||||
if [[ "${DISABLE_EXTENSIONS_LOWER}" == "1" || "${DISABLE_EXTENSIONS_LOWER}" == "true" || "${DISABLE_EXTENSIONS_LOWER}" == "yes" || "${DISABLE_EXTENSIONS_LOWER}" == "on" ]]; then
|
||||
CHROME_ARGS+=(
|
||||
"--disable-extensions"
|
||||
)
|
||||
if [[ "${DISABLE_EXTENSIONS_LOWER}" =~ ^(1|true|yes|on)$ ]]; then
|
||||
CHROME_ARGS+=("--disable-extensions")
|
||||
fi
|
||||
|
||||
if [[ "${RENDERER_PROCESS_LIMIT}" =~ ^[0-9]+$ && "${RENDERER_PROCESS_LIMIT}" -gt 0 ]]; then
|
||||
CHROME_ARGS+=("--renderer-process-limit=${RENDERER_PROCESS_LIMIT}")
|
||||
fi
|
||||
|
||||
if [[ "${ALLOW_NO_SANDBOX}" == "1" ]]; then
|
||||
CHROME_ARGS+=(
|
||||
"--no-sandbox"
|
||||
"--disable-setuid-sandbox"
|
||||
)
|
||||
fi
|
||||
|
||||
dedupe_chrome_args
|
||||
echo "[sandbox] Starting Chromium..."
|
||||
chromium "${CHROME_ARGS[@]}" about:blank &
|
||||
CHROME_PID=$!
|
||||
echo "[sandbox] Chromium started (PID: ${CHROME_PID})"
|
||||
|
||||
for _ in $(seq 1 50); do
|
||||
if curl -sS --max-time 1 "http://127.0.0.1:${CHROME_CDP_PORT}/json/version" >/dev/null; then
|
||||
start_ms=$(date +%s%3N)
|
||||
deadline_ms=$(( start_ms + AUTO_START_TIMEOUT_MS ))
|
||||
CDP_READY=0
|
||||
probe_url="http://127.0.0.1:${CHROME_CDP_PORT}/json/version"
|
||||
|
||||
echo "[sandbox] Waiting up to ${AUTO_START_TIMEOUT_MS}ms for CDP on port ${CHROME_CDP_PORT}..."
|
||||
|
||||
while (( $(date +%s%3N) < deadline_ms )); do
|
||||
if ! kill -0 "${CHROME_PID}" 2>/dev/null; then
|
||||
echo "[sandbox] ERROR: Chromium exited before CDP became ready."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if curl -fsS --max-time 0.5 "${probe_url}" >/dev/null; then
|
||||
CDP_READY=1
|
||||
break
|
||||
fi
|
||||
sleep 0.1
|
||||
|
||||
sleep 0.2
|
||||
done
|
||||
|
||||
if [[ "${CDP_READY}" == "0" ]]; then
|
||||
echo "[sandbox] ERROR: CDP failed to start within ${AUTO_START_TIMEOUT_MS}ms."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "[sandbox] CDP ready. Starting socat..."
|
||||
|
||||
SOCAT_LISTEN_ADDR="TCP-LISTEN:${CDP_PORT},fork,reuseaddr,bind=0.0.0.0"
|
||||
if [[ -n "${CDP_SOURCE_RANGE}" ]]; then
|
||||
SOCAT_LISTEN_ADDR="${SOCAT_LISTEN_ADDR},range=${CDP_SOURCE_RANGE}"
|
||||
fi
|
||||
|
||||
socat "${SOCAT_LISTEN_ADDR}" "TCP:127.0.0.1:${CHROME_CDP_PORT}" &
|
||||
SOCAT_PID=$!
|
||||
echo "[sandbox] socat started (PID: ${SOCAT_PID})"
|
||||
|
||||
if [[ "${ENABLE_NOVNC}" == "1" && "${HEADLESS}" != "1" ]]; then
|
||||
# VNC auth passwords are max 8 chars; use a random default when not provided.
|
||||
if [[ -z "${NOVNC_PASSWORD}" ]]; then
|
||||
NOVNC_PASSWORD="$(< /proc/sys/kernel/random/uuid)"
|
||||
NOVNC_PASSWORD="${NOVNC_PASSWORD//-/}"
|
||||
NOVNC_PASSWORD="${NOVNC_PASSWORD:0:8}"
|
||||
fi
|
||||
NOVNC_PASSWD_FILE="${HOME}/.vnc/passwd"
|
||||
|
||||
mkdir -p "${HOME}/.vnc"
|
||||
x11vnc -storepasswd "${NOVNC_PASSWORD}" "${NOVNC_PASSWD_FILE}" >/dev/null
|
||||
chmod 600 "${NOVNC_PASSWD_FILE}"
|
||||
x11vnc -display :1 -rfbport "${VNC_PORT}" -shared -forever -rfbauth "${NOVNC_PASSWD_FILE}" -localhost &
|
||||
x11vnc -storepasswd "${NOVNC_PASSWORD}" "${HOME}/.vnc/passwd" >/dev/null
|
||||
chmod 600 "${HOME}/.vnc/passwd"
|
||||
|
||||
x11vnc -display :1 -rfbport "${VNC_PORT}" -shared -forever -rfbauth "${HOME}/.vnc/passwd" -localhost &
|
||||
X11VNC_PID=$!
|
||||
echo "[sandbox] x11vnc started (PID: ${X11VNC_PID})"
|
||||
|
||||
websockify --web /usr/share/novnc/ "${NOVNC_PORT}" "localhost:${VNC_PORT}" &
|
||||
WEBSOCKIFY_PID=$!
|
||||
echo "[sandbox] websockify started (PID: ${WEBSOCKIFY_PID})"
|
||||
fi
|
||||
|
||||
echo "[sandbox] Container running. Monitoring all sub-processes..."
|
||||
wait -n
|
||||
|
||||
@@ -108,6 +108,7 @@ describe("ensureSandboxBrowser create args", () => {
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
vi.restoreAllMocks();
|
||||
BROWSER_BRIDGES.clear();
|
||||
resetNoVncObserverTokensForTests();
|
||||
dockerMocks.dockerContainerState.mockClear();
|
||||
@@ -239,4 +240,39 @@ describe("ensureSandboxBrowser create args", () => {
|
||||
const labels = collectDockerFlagValues(createArgs ?? [], "--label");
|
||||
expect(labels).toContain(`openclaw.mountFormatVersion=${SANDBOX_MOUNT_FORMAT_VERSION}`);
|
||||
});
|
||||
|
||||
it("force-removes the browser container when CDP never becomes reachable", async () => {
|
||||
vi.spyOn(globalThis, "fetch").mockRejectedValue(new Error("timeout"));
|
||||
bridgeMocks.startBrowserBridgeServer.mockImplementationOnce(async (params) => {
|
||||
await params.onEnsureAttachTarget?.({});
|
||||
return {
|
||||
server: {} as never,
|
||||
port: 19000,
|
||||
baseUrl: "http://127.0.0.1:19000",
|
||||
state: {
|
||||
server: null,
|
||||
port: 19000,
|
||||
resolved: { profiles: {} },
|
||||
profiles: new Map(),
|
||||
},
|
||||
};
|
||||
});
|
||||
|
||||
const cfg = buildConfig(false);
|
||||
cfg.browser.autoStartTimeoutMs = 1;
|
||||
|
||||
await expect(
|
||||
ensureSandboxBrowser({
|
||||
scopeKey: "session:test",
|
||||
workspaceDir: "/tmp/workspace",
|
||||
agentWorkspaceDir: "/tmp/workspace",
|
||||
cfg,
|
||||
}),
|
||||
).rejects.toThrow("hung container has been forcefully removed");
|
||||
|
||||
expect(dockerMocks.execDocker).toHaveBeenCalledWith(
|
||||
["rm", "-f", expect.stringMatching(/^openclaw-sbx-browser-session-test-/)],
|
||||
{ allowFailure: true },
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -167,6 +167,7 @@ export async function ensureSandboxBrowser(params: {
|
||||
noVncPort: params.cfg.browser.noVncPort,
|
||||
headless: params.cfg.browser.headless,
|
||||
enableNoVnc: params.cfg.browser.enableNoVnc,
|
||||
autoStartTimeoutMs: params.cfg.browser.autoStartTimeoutMs,
|
||||
cdpSourceRange,
|
||||
},
|
||||
securityEpoch: SANDBOX_BROWSER_SECURITY_HASH_EPOCH,
|
||||
@@ -262,14 +263,15 @@ export async function ensureSandboxBrowser(params: {
|
||||
args.push("-e", `OPENCLAW_BROWSER_HEADLESS=${params.cfg.browser.headless ? "1" : "0"}`);
|
||||
args.push("-e", `OPENCLAW_BROWSER_ENABLE_NOVNC=${params.cfg.browser.enableNoVnc ? "1" : "0"}`);
|
||||
args.push("-e", `OPENCLAW_BROWSER_CDP_PORT=${params.cfg.browser.cdpPort}`);
|
||||
args.push(
|
||||
"-e",
|
||||
`OPENCLAW_BROWSER_AUTO_START_TIMEOUT_MS=${params.cfg.browser.autoStartTimeoutMs}`,
|
||||
);
|
||||
if (cdpSourceRange) {
|
||||
args.push("-e", `${CDP_SOURCE_RANGE_ENV_KEY}=${cdpSourceRange}`);
|
||||
}
|
||||
args.push("-e", `OPENCLAW_BROWSER_VNC_PORT=${params.cfg.browser.vncPort}`);
|
||||
args.push("-e", `OPENCLAW_BROWSER_NOVNC_PORT=${params.cfg.browser.noVncPort}`);
|
||||
// Chromium's setuid/namespace sandbox cannot work inside Docker containers
|
||||
// (PID namespace creation requires privileges Docker does not grant by default).
|
||||
// The container itself provides isolation, so --no-sandbox is safe here.
|
||||
args.push("-e", "OPENCLAW_BROWSER_NO_SANDBOX=1");
|
||||
if (noVncEnabled && noVncPassword) {
|
||||
args.push("-e", `${NOVNC_PASSWORD_ENV_KEY}=${noVncPassword}`);
|
||||
@@ -302,9 +304,6 @@ export async function ensureSandboxBrowser(params: {
|
||||
let desiredAuthToken = normalizeOptionalString(params.bridgeAuth?.token);
|
||||
let desiredAuthPassword = normalizeOptionalString(params.bridgeAuth?.password);
|
||||
if (!desiredAuthToken && !desiredAuthPassword) {
|
||||
// Always require auth for the sandbox bridge server, even if gateway auth
|
||||
// mode doesn't produce a shared secret (e.g. trusted-proxy).
|
||||
// Keep it stable across calls by reusing the existing bridge auth.
|
||||
desiredAuthToken = existing?.authToken;
|
||||
desiredAuthPassword = existing?.authPassword;
|
||||
if (!desiredAuthToken && !desiredAuthPassword) {
|
||||
@@ -349,8 +348,9 @@ export async function ensureSandboxBrowser(params: {
|
||||
timeoutMs: params.cfg.browser.autoStartTimeoutMs,
|
||||
});
|
||||
if (!ok) {
|
||||
await execDocker(["rm", "-f", containerName], { allowFailure: true });
|
||||
throw new Error(
|
||||
`Sandbox browser CDP did not become reachable on 127.0.0.1:${mappedCdp} within ${params.cfg.browser.autoStartTimeoutMs}ms.`,
|
||||
`Sandbox browser CDP did not become reachable on 127.0.0.1:${mappedCdp} within ${params.cfg.browser.autoStartTimeoutMs}ms. The hung container has been forcefully removed.`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -118,6 +118,7 @@ describe("computeSandboxBrowserConfigHash", () => {
|
||||
noVncPort: 6080,
|
||||
headless: false,
|
||||
enableNoVnc: true,
|
||||
autoStartTimeoutMs: 12000,
|
||||
},
|
||||
securityEpoch: "epoch-v1",
|
||||
workspaceAccess: "rw" as const,
|
||||
@@ -150,6 +151,7 @@ describe("computeSandboxBrowserConfigHash", () => {
|
||||
noVncPort: 6080,
|
||||
headless: false,
|
||||
enableNoVnc: true,
|
||||
autoStartTimeoutMs: 12000,
|
||||
},
|
||||
workspaceAccess: "rw" as const,
|
||||
workspaceDir: "/tmp/workspace",
|
||||
@@ -176,6 +178,7 @@ describe("computeSandboxBrowserConfigHash", () => {
|
||||
noVncPort: 6080,
|
||||
headless: false,
|
||||
enableNoVnc: true,
|
||||
autoStartTimeoutMs: 12000,
|
||||
},
|
||||
securityEpoch: "epoch-v1",
|
||||
workspaceAccess: "rw" as const,
|
||||
@@ -204,6 +207,7 @@ describe("computeSandboxBrowserConfigHash", () => {
|
||||
noVncPort: 6080,
|
||||
headless: false,
|
||||
enableNoVnc: true,
|
||||
autoStartTimeoutMs: 12000,
|
||||
},
|
||||
securityEpoch: "epoch-v1",
|
||||
workspaceAccess: "rw" as const,
|
||||
|
||||
@@ -13,7 +13,13 @@ type SandboxBrowserHashInput = {
|
||||
docker: SandboxDockerConfig;
|
||||
browser: Pick<
|
||||
SandboxBrowserConfig,
|
||||
"cdpPort" | "cdpSourceRange" | "vncPort" | "noVncPort" | "headless" | "enableNoVnc"
|
||||
| "cdpPort"
|
||||
| "cdpSourceRange"
|
||||
| "vncPort"
|
||||
| "noVncPort"
|
||||
| "headless"
|
||||
| "enableNoVnc"
|
||||
| "autoStartTimeoutMs"
|
||||
>;
|
||||
securityEpoch: string;
|
||||
workspaceAccess: SandboxWorkspaceAccess;
|
||||
|
||||
Reference in New Issue
Block a user