fix(browser): circuit-break managed launch failures

This commit is contained in:
Peter Steinberger
2026-04-27 09:57:52 +01:00
parent 36a936af66
commit b4b21cbc93
6 changed files with 157 additions and 8 deletions

View File

@@ -21,6 +21,7 @@ Docs: https://docs.openclaw.ai
- CLI/plugins: let config-gated bundled plugins install without persisting invalid placeholder config entries, so install/uninstall sweeps can cover plugins such as memory-lancedb before the user configures credentials. Thanks @vincentkoc.
- Agents/sessions: acquire the session write lock only after cold bootstrap, plugin, and tool setup so fallback runs are not blocked by stalled pre-model startup work. Thanks @codex.
- Browser/plugins: auto-start the bundled browser plugin when root `browser` config is present, including restrictive plugin allowlists, and ignore stale persisted plugin registries whose package paths no longer exist. Thanks @codex.
- Browser: circuit-break repeated managed Chrome launch failures per profile so browser requests stop spawning Chromium indefinitely when CDP cannot start. Fixes #64271. Thanks @TheophilusChinomona.
- Gateway/models: skip external OpenRouter and LiteLLM pricing refreshes for local/self-hosted model endpoints so startup does not wait on remote pricing catalogs for local-only Ollama, vLLM, and compatible providers. Thanks @codex.
- CLI/plugins: stop security-blocked plugin installs from retrying as hook packs, so normal plugin packages report the scanner failure without a misleading "not a valid hook pack" follow-up. Fixes #61175; supersedes #64102. Thanks @KonsultDigital and @ziyincody.
- Agents/Anthropic: strip stale trailing assistant prefill turns from outbound replay so context-engine short circuits cannot send unsupported assistant-prefill payloads to provider APIs. Fixes #72556. Thanks @Veda-openclaw.

View File

@@ -203,6 +203,11 @@ Browser settings live in `~/.openclaw/openclaw.json`.
Raise these on Raspberry Pi, low-end VPS, or older hardware where Chromium
starts slowly. Values must be positive integers up to `120000` ms; invalid
config values are rejected.
- Repeated managed Chrome launch/readiness failures are circuit-broken per
profile. After several consecutive failures, OpenClaw pauses new launch
attempts briefly instead of spawning Chromium on every browser tool call. Fix
the startup problem, disable the browser if it is not needed, or restart the
Gateway after repair.
- `actionTimeoutMs` is the default budget for browser `act` requests when the caller does not pass `timeoutMs`. The client transport adds a small slack window so long waits can finish instead of timing out at the HTTP boundary.
- `tabCleanup` is best-effort cleanup for tabs opened by primary-agent browser sessions. Subagent, cron, and ACP lifecycle cleanup still closes their explicit tracked tabs at session end; primary sessions keep active tabs reusable, then close idle or excess tracked tabs in the background.

View File

@@ -56,6 +56,10 @@ type BrowserEnsureOptions = {
headless?: boolean;
};
const MANAGED_LAUNCH_FAILURE_THRESHOLD = 3;
const MANAGED_LAUNCH_COOLDOWN_BASE_MS = 30_000;
const MANAGED_LAUNCH_COOLDOWN_MAX_MS = 5 * 60_000;
function launchOptionsForEnsure(options?: BrowserEnsureOptions) {
return typeof options?.headless === "boolean"
? { headlessOverride: options.headless }
@@ -81,6 +85,51 @@ function formatLocalPortOwnershipHint(profile: ResolvedBrowserProfile): string {
);
}
function normalizeFailureMessage(err: unknown): string {
const raw = err instanceof Error ? err.message : String(err);
const trimmed = raw.trim();
return trimmed || "unknown browser launch failure";
}
function resetManagedLaunchFailure(profileState: ProfileRuntimeState): void {
profileState.managedLaunchFailure = undefined;
}
function recordManagedLaunchFailure(profileState: ProfileRuntimeState, err: unknown): void {
const previous = profileState.managedLaunchFailure;
const consecutiveFailures = (previous?.consecutiveFailures ?? 0) + 1;
const exponent = Math.max(0, consecutiveFailures - MANAGED_LAUNCH_FAILURE_THRESHOLD);
const cooldownMs =
consecutiveFailures >= MANAGED_LAUNCH_FAILURE_THRESHOLD
? Math.min(MANAGED_LAUNCH_COOLDOWN_MAX_MS, MANAGED_LAUNCH_COOLDOWN_BASE_MS * 2 ** exponent)
: 0;
const now = Date.now();
profileState.managedLaunchFailure = {
consecutiveFailures,
lastFailureAt: now,
...(cooldownMs > 0 ? { cooldownUntil: now + cooldownMs } : {}),
lastError: normalizeFailureMessage(err),
};
}
function assertManagedLaunchNotCoolingDown(profileName: string, profileState: ProfileRuntimeState) {
const failure = profileState.managedLaunchFailure;
if (!failure || failure.consecutiveFailures < MANAGED_LAUNCH_FAILURE_THRESHOLD) {
return;
}
const cooldownUntil = failure.cooldownUntil ?? 0;
const remainingMs = cooldownUntil - Date.now();
if (remainingMs <= 0) {
return;
}
const retrySeconds = Math.max(1, Math.ceil(remainingMs / 1000));
throw new BrowserProfileUnavailableError(
`Browser launch for profile "${profileName}" is cooling down after ${failure.consecutiveFailures} consecutive managed Chrome launch failures. ` +
`Retry in ${retrySeconds}s after fixing Chrome startup, or set browser.enabled=false if the browser tool is not needed. ` +
`Last error: ${failure.lastError}`,
);
}
export function createProfileAvailability({
opts,
profile,
@@ -189,6 +238,7 @@ export function createProfileAvailability({
profileState.lastTargetId = null;
const previousProfile = reconcile.previousProfile;
resetManagedLaunchFailure(profileState);
if (profileState.running) {
await stopOpenClawChrome(profileState.running).catch(() => {});
setProfileRunning(null);
@@ -243,7 +293,19 @@ export function createProfileAvailability({
throw new BrowserProfileUnavailableError(formatChromeMcpAttachFailure(lastError));
};
let inflightEnsureBrowserAvailable: { key: string; promise: Promise<void> } | null = null;
const launchManagedChrome = async (
profileState: ProfileRuntimeState,
current: BrowserServerState,
launchOptions: ReturnType<typeof launchOptionsForEnsure>,
) => {
assertManagedLaunchNotCoolingDown(profile.name, profileState);
try {
return await launchOpenClawChrome(current.resolved, profile, launchOptions);
} catch (err) {
recordManagedLaunchFailure(profileState, err);
throw err;
}
};
const ensureBrowserAvailableOnce = async (options?: BrowserEnsureOptions): Promise<void> => {
await reconcileProfileRuntime();
@@ -280,6 +342,7 @@ export function createProfileAvailability({
(await isHttpReachable(PROFILE_ATTACH_RETRY_TIMEOUT_MS)) &&
(await isReachable(PROFILE_ATTACH_RETRY_TIMEOUT_MS))
) {
resetManagedLaunchFailure(profileState);
return;
}
}
@@ -290,13 +353,15 @@ export function createProfileAvailability({
: `Browser attachOnly is enabled and profile "${profile.name}" is not running.`,
);
}
const launched = await launchOpenClawChrome(current.resolved, profile, launchOptions);
const launched = await launchManagedChrome(profileState, current, launchOptions);
attachRunning(launched);
try {
await waitForCdpReadyAfterLaunch();
resetManagedLaunchFailure(profileState);
} catch (err) {
await stopOpenClawChrome(launched).catch(() => {});
setProfileRunning(null);
recordManagedLaunchFailure(profileState, err);
throw err;
}
return;
@@ -304,6 +369,7 @@ export function createProfileAvailability({
// Port is reachable - check if we own it.
if (await isReachable()) {
resetManagedLaunchFailure(profileState);
return;
}
@@ -339,22 +405,26 @@ export function createProfileAvailability({
await stopOpenClawChrome(profileState.running);
setProfileRunning(null);
const relaunched = await launchOpenClawChrome(current.resolved, profile, launchOptions);
const relaunched = await launchManagedChrome(profileState, current, launchOptions);
attachRunning(relaunched);
if (!(await isReachable(PROFILE_POST_RESTART_WS_TIMEOUT_MS))) {
throw new Error(
const err = new Error(
`Chrome CDP websocket for profile "${profile.name}" is not reachable after restart. ${await describeCdpFailure(
PROFILE_POST_RESTART_WS_TIMEOUT_MS,
)}`,
);
recordManagedLaunchFailure(profileState, err);
throw err;
}
resetManagedLaunchFailure(profileState);
};
const ensureBrowserAvailable = async (options?: BrowserEnsureOptions): Promise<void> => {
const key = ensureOptionsKey(options);
const profileState = getProfileState();
for (;;) {
const current = inflightEnsureBrowserAvailable;
const current = profileState.ensureBrowserAvailable;
if (!current) {
break;
}
@@ -364,11 +434,11 @@ export function createProfileAvailability({
await current.promise.catch(() => {});
}
const promise = ensureBrowserAvailableOnce(options).finally(() => {
if (inflightEnsureBrowserAvailable?.promise === promise) {
inflightEnsureBrowserAvailable = null;
if (profileState.ensureBrowserAvailable?.promise === promise) {
profileState.ensureBrowserAvailable = null;
}
});
inflightEnsureBrowserAvailable = { key, promise };
profileState.ensureBrowserAvailable = { key, promise };
return promise;
};
@@ -380,6 +450,7 @@ export function createProfileAvailability({
return { stopped };
}
const profileState = getProfileState();
resetManagedLaunchFailure(profileState);
if (!profileState.running) {
const idleStop = resolveIdleProfileStopOutcome(profile);
if (idleStop.closePlaywright) {

View File

@@ -121,6 +121,23 @@ describe("browser server-context ensureBrowserAvailable", () => {
expect(stopOpenClawChrome).not.toHaveBeenCalled();
});
it("deduplicates concurrent lazy-start calls across fresh profile contexts", async () => {
const { launchOpenClawChrome, stopOpenClawChrome, isChromeCdpReady, state } =
setupEnsureBrowserAvailableHarness();
isChromeCdpReady.mockResolvedValue(true);
mockLaunchedChrome(launchOpenClawChrome, 457);
const firstCtx = createBrowserRouteContext({ getState: () => state });
const secondCtx = createBrowserRouteContext({ getState: () => state });
const first = firstCtx.forProfile("openclaw").ensureBrowserAvailable();
const second = secondCtx.forProfile("openclaw").ensureBrowserAvailable();
await vi.advanceTimersByTimeAsync(100);
await expect(Promise.all([first, second])).resolves.toEqual([undefined, undefined]);
expect(launchOpenClawChrome).toHaveBeenCalledTimes(1);
expect(stopOpenClawChrome).not.toHaveBeenCalled();
});
it("passes request-local headless override to initial launch", async () => {
const { launchOpenClawChrome, stopOpenClawChrome, isChromeCdpReady, profile } =
setupEnsureBrowserAvailableHarness();
@@ -203,6 +220,52 @@ describe("browser server-context ensureBrowserAvailable", () => {
expect(stopOpenClawChrome).not.toHaveBeenCalled();
});
it("cools down repeated managed Chrome launch failures across route contexts", async () => {
const { launchOpenClawChrome, stopOpenClawChrome, isChromeCdpReady, state } =
setupEnsureBrowserAvailableHarness();
isChromeCdpReady.mockResolvedValue(true);
launchOpenClawChrome.mockRejectedValue(new Error("Failed to start Chrome CDP"));
for (let attempt = 0; attempt < 3; attempt += 1) {
const ctx = createBrowserRouteContext({ getState: () => state });
await expect(ctx.forProfile("openclaw").ensureBrowserAvailable()).rejects.toThrow(
"Failed to start Chrome CDP",
);
}
const cooledDownCtx = createBrowserRouteContext({ getState: () => state });
await expect(cooledDownCtx.forProfile("openclaw").ensureBrowserAvailable()).rejects.toThrow(
'Browser launch for profile "openclaw" is cooling down after 3 consecutive managed Chrome launch failures.',
);
await expect(cooledDownCtx.forProfile("openclaw").ensureBrowserAvailable()).rejects.toThrow(
"set browser.enabled=false if the browser tool is not needed",
);
expect(launchOpenClawChrome).toHaveBeenCalledTimes(3);
expect(stopOpenClawChrome).not.toHaveBeenCalled();
});
it("allows one managed Chrome launch attempt after the cooldown expires", async () => {
const { launchOpenClawChrome, isChromeCdpReady, state } = setupEnsureBrowserAvailableHarness();
isChromeCdpReady.mockResolvedValue(true);
launchOpenClawChrome.mockRejectedValue(new Error("Failed to start Chrome CDP"));
for (let attempt = 0; attempt < 3; attempt += 1) {
const ctx = createBrowserRouteContext({ getState: () => state });
await expect(ctx.forProfile("openclaw").ensureBrowserAvailable()).rejects.toThrow(
"Failed to start Chrome CDP",
);
}
await vi.advanceTimersByTimeAsync(30_000);
const retryCtx = createBrowserRouteContext({ getState: () => state });
await expect(retryCtx.forProfile("openclaw").ensureBrowserAvailable()).rejects.toThrow(
"Failed to start Chrome CDP",
);
expect(launchOpenClawChrome).toHaveBeenCalledTimes(4);
});
it("reuses a pre-existing loopback browser after an initial short probe miss", async () => {
const { launchOpenClawChrome, stopOpenClawChrome, isChromeCdpReady, profile, state } =
setupEnsureBrowserAvailableHarness();

View File

@@ -35,6 +35,8 @@ export function createProfileResetOps({
const userDataDir = resolveOpenClawUserDataDir(profile.name);
const profileState = getProfileState();
profileState.managedLaunchFailure = undefined;
profileState.ensureBrowserAvailable = null;
const httpReachable = await isHttpReachable(300);
if (httpReachable && !profileState.running) {
// Port in use but not by us - kill it.

View File

@@ -11,6 +11,13 @@ export type { BrowserTab };
export type ProfileRuntimeState = {
profile: ResolvedBrowserProfile;
running: RunningChrome | null;
ensureBrowserAvailable?: { key: string; promise: Promise<void> } | null;
managedLaunchFailure?: {
consecutiveFailures: number;
lastFailureAt: number;
cooldownUntil?: number;
lastError: string;
};
/** Sticky tab selection when callers omit targetId (keeps snapshot+act consistent). */
lastTargetId?: string | null;
/** Stable, user-facing tab aliases scoped to this profile runtime. */