mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 09:10:45 +00:00
fix(browser): circuit-break managed launch failures
This commit is contained in:
@@ -21,6 +21,7 @@ Docs: https://docs.openclaw.ai
|
||||
- CLI/plugins: let config-gated bundled plugins install without persisting invalid placeholder config entries, so install/uninstall sweeps can cover plugins such as memory-lancedb before the user configures credentials. Thanks @vincentkoc.
|
||||
- Agents/sessions: acquire the session write lock only after cold bootstrap, plugin, and tool setup so fallback runs are not blocked by stalled pre-model startup work. Thanks @codex.
|
||||
- Browser/plugins: auto-start the bundled browser plugin when root `browser` config is present, including restrictive plugin allowlists, and ignore stale persisted plugin registries whose package paths no longer exist. Thanks @codex.
|
||||
- Browser: circuit-break repeated managed Chrome launch failures per profile so browser requests stop spawning Chromium indefinitely when CDP cannot start. Fixes #64271. Thanks @TheophilusChinomona.
|
||||
- Gateway/models: skip external OpenRouter and LiteLLM pricing refreshes for local/self-hosted model endpoints so startup does not wait on remote pricing catalogs for local-only Ollama, vLLM, and compatible providers. Thanks @codex.
|
||||
- CLI/plugins: stop security-blocked plugin installs from retrying as hook packs, so normal plugin packages report the scanner failure without a misleading "not a valid hook pack" follow-up. Fixes #61175; supersedes #64102. Thanks @KonsultDigital and @ziyincody.
|
||||
- Agents/Anthropic: strip stale trailing assistant prefill turns from outbound replay so context-engine short circuits cannot send unsupported assistant-prefill payloads to provider APIs. Fixes #72556. Thanks @Veda-openclaw.
|
||||
|
||||
@@ -203,6 +203,11 @@ Browser settings live in `~/.openclaw/openclaw.json`.
|
||||
Raise these on Raspberry Pi, low-end VPS, or older hardware where Chromium
|
||||
starts slowly. Values must be positive integers up to `120000` ms; invalid
|
||||
config values are rejected.
|
||||
- Repeated managed Chrome launch/readiness failures are circuit-broken per
|
||||
profile. After several consecutive failures, OpenClaw pauses new launch
|
||||
attempts briefly instead of spawning Chromium on every browser tool call. Fix
|
||||
the startup problem, disable the browser if it is not needed, or restart the
|
||||
Gateway after repair.
|
||||
- `actionTimeoutMs` is the default budget for browser `act` requests when the caller does not pass `timeoutMs`. The client transport adds a small slack window so long waits can finish instead of timing out at the HTTP boundary.
|
||||
- `tabCleanup` is best-effort cleanup for tabs opened by primary-agent browser sessions. Subagent, cron, and ACP lifecycle cleanup still closes their explicit tracked tabs at session end; primary sessions keep active tabs reusable, then close idle or excess tracked tabs in the background.
|
||||
|
||||
|
||||
@@ -56,6 +56,10 @@ type BrowserEnsureOptions = {
|
||||
headless?: boolean;
|
||||
};
|
||||
|
||||
const MANAGED_LAUNCH_FAILURE_THRESHOLD = 3;
|
||||
const MANAGED_LAUNCH_COOLDOWN_BASE_MS = 30_000;
|
||||
const MANAGED_LAUNCH_COOLDOWN_MAX_MS = 5 * 60_000;
|
||||
|
||||
function launchOptionsForEnsure(options?: BrowserEnsureOptions) {
|
||||
return typeof options?.headless === "boolean"
|
||||
? { headlessOverride: options.headless }
|
||||
@@ -81,6 +85,51 @@ function formatLocalPortOwnershipHint(profile: ResolvedBrowserProfile): string {
|
||||
);
|
||||
}
|
||||
|
||||
function normalizeFailureMessage(err: unknown): string {
|
||||
const raw = err instanceof Error ? err.message : String(err);
|
||||
const trimmed = raw.trim();
|
||||
return trimmed || "unknown browser launch failure";
|
||||
}
|
||||
|
||||
function resetManagedLaunchFailure(profileState: ProfileRuntimeState): void {
|
||||
profileState.managedLaunchFailure = undefined;
|
||||
}
|
||||
|
||||
function recordManagedLaunchFailure(profileState: ProfileRuntimeState, err: unknown): void {
|
||||
const previous = profileState.managedLaunchFailure;
|
||||
const consecutiveFailures = (previous?.consecutiveFailures ?? 0) + 1;
|
||||
const exponent = Math.max(0, consecutiveFailures - MANAGED_LAUNCH_FAILURE_THRESHOLD);
|
||||
const cooldownMs =
|
||||
consecutiveFailures >= MANAGED_LAUNCH_FAILURE_THRESHOLD
|
||||
? Math.min(MANAGED_LAUNCH_COOLDOWN_MAX_MS, MANAGED_LAUNCH_COOLDOWN_BASE_MS * 2 ** exponent)
|
||||
: 0;
|
||||
const now = Date.now();
|
||||
profileState.managedLaunchFailure = {
|
||||
consecutiveFailures,
|
||||
lastFailureAt: now,
|
||||
...(cooldownMs > 0 ? { cooldownUntil: now + cooldownMs } : {}),
|
||||
lastError: normalizeFailureMessage(err),
|
||||
};
|
||||
}
|
||||
|
||||
function assertManagedLaunchNotCoolingDown(profileName: string, profileState: ProfileRuntimeState) {
|
||||
const failure = profileState.managedLaunchFailure;
|
||||
if (!failure || failure.consecutiveFailures < MANAGED_LAUNCH_FAILURE_THRESHOLD) {
|
||||
return;
|
||||
}
|
||||
const cooldownUntil = failure.cooldownUntil ?? 0;
|
||||
const remainingMs = cooldownUntil - Date.now();
|
||||
if (remainingMs <= 0) {
|
||||
return;
|
||||
}
|
||||
const retrySeconds = Math.max(1, Math.ceil(remainingMs / 1000));
|
||||
throw new BrowserProfileUnavailableError(
|
||||
`Browser launch for profile "${profileName}" is cooling down after ${failure.consecutiveFailures} consecutive managed Chrome launch failures. ` +
|
||||
`Retry in ${retrySeconds}s after fixing Chrome startup, or set browser.enabled=false if the browser tool is not needed. ` +
|
||||
`Last error: ${failure.lastError}`,
|
||||
);
|
||||
}
|
||||
|
||||
export function createProfileAvailability({
|
||||
opts,
|
||||
profile,
|
||||
@@ -189,6 +238,7 @@ export function createProfileAvailability({
|
||||
profileState.lastTargetId = null;
|
||||
|
||||
const previousProfile = reconcile.previousProfile;
|
||||
resetManagedLaunchFailure(profileState);
|
||||
if (profileState.running) {
|
||||
await stopOpenClawChrome(profileState.running).catch(() => {});
|
||||
setProfileRunning(null);
|
||||
@@ -243,7 +293,19 @@ export function createProfileAvailability({
|
||||
throw new BrowserProfileUnavailableError(formatChromeMcpAttachFailure(lastError));
|
||||
};
|
||||
|
||||
let inflightEnsureBrowserAvailable: { key: string; promise: Promise<void> } | null = null;
|
||||
const launchManagedChrome = async (
|
||||
profileState: ProfileRuntimeState,
|
||||
current: BrowserServerState,
|
||||
launchOptions: ReturnType<typeof launchOptionsForEnsure>,
|
||||
) => {
|
||||
assertManagedLaunchNotCoolingDown(profile.name, profileState);
|
||||
try {
|
||||
return await launchOpenClawChrome(current.resolved, profile, launchOptions);
|
||||
} catch (err) {
|
||||
recordManagedLaunchFailure(profileState, err);
|
||||
throw err;
|
||||
}
|
||||
};
|
||||
|
||||
const ensureBrowserAvailableOnce = async (options?: BrowserEnsureOptions): Promise<void> => {
|
||||
await reconcileProfileRuntime();
|
||||
@@ -280,6 +342,7 @@ export function createProfileAvailability({
|
||||
(await isHttpReachable(PROFILE_ATTACH_RETRY_TIMEOUT_MS)) &&
|
||||
(await isReachable(PROFILE_ATTACH_RETRY_TIMEOUT_MS))
|
||||
) {
|
||||
resetManagedLaunchFailure(profileState);
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -290,13 +353,15 @@ export function createProfileAvailability({
|
||||
: `Browser attachOnly is enabled and profile "${profile.name}" is not running.`,
|
||||
);
|
||||
}
|
||||
const launched = await launchOpenClawChrome(current.resolved, profile, launchOptions);
|
||||
const launched = await launchManagedChrome(profileState, current, launchOptions);
|
||||
attachRunning(launched);
|
||||
try {
|
||||
await waitForCdpReadyAfterLaunch();
|
||||
resetManagedLaunchFailure(profileState);
|
||||
} catch (err) {
|
||||
await stopOpenClawChrome(launched).catch(() => {});
|
||||
setProfileRunning(null);
|
||||
recordManagedLaunchFailure(profileState, err);
|
||||
throw err;
|
||||
}
|
||||
return;
|
||||
@@ -304,6 +369,7 @@ export function createProfileAvailability({
|
||||
|
||||
// Port is reachable - check if we own it.
|
||||
if (await isReachable()) {
|
||||
resetManagedLaunchFailure(profileState);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -339,22 +405,26 @@ export function createProfileAvailability({
|
||||
await stopOpenClawChrome(profileState.running);
|
||||
setProfileRunning(null);
|
||||
|
||||
const relaunched = await launchOpenClawChrome(current.resolved, profile, launchOptions);
|
||||
const relaunched = await launchManagedChrome(profileState, current, launchOptions);
|
||||
attachRunning(relaunched);
|
||||
|
||||
if (!(await isReachable(PROFILE_POST_RESTART_WS_TIMEOUT_MS))) {
|
||||
throw new Error(
|
||||
const err = new Error(
|
||||
`Chrome CDP websocket for profile "${profile.name}" is not reachable after restart. ${await describeCdpFailure(
|
||||
PROFILE_POST_RESTART_WS_TIMEOUT_MS,
|
||||
)}`,
|
||||
);
|
||||
recordManagedLaunchFailure(profileState, err);
|
||||
throw err;
|
||||
}
|
||||
resetManagedLaunchFailure(profileState);
|
||||
};
|
||||
|
||||
const ensureBrowserAvailable = async (options?: BrowserEnsureOptions): Promise<void> => {
|
||||
const key = ensureOptionsKey(options);
|
||||
const profileState = getProfileState();
|
||||
for (;;) {
|
||||
const current = inflightEnsureBrowserAvailable;
|
||||
const current = profileState.ensureBrowserAvailable;
|
||||
if (!current) {
|
||||
break;
|
||||
}
|
||||
@@ -364,11 +434,11 @@ export function createProfileAvailability({
|
||||
await current.promise.catch(() => {});
|
||||
}
|
||||
const promise = ensureBrowserAvailableOnce(options).finally(() => {
|
||||
if (inflightEnsureBrowserAvailable?.promise === promise) {
|
||||
inflightEnsureBrowserAvailable = null;
|
||||
if (profileState.ensureBrowserAvailable?.promise === promise) {
|
||||
profileState.ensureBrowserAvailable = null;
|
||||
}
|
||||
});
|
||||
inflightEnsureBrowserAvailable = { key, promise };
|
||||
profileState.ensureBrowserAvailable = { key, promise };
|
||||
return promise;
|
||||
};
|
||||
|
||||
@@ -380,6 +450,7 @@ export function createProfileAvailability({
|
||||
return { stopped };
|
||||
}
|
||||
const profileState = getProfileState();
|
||||
resetManagedLaunchFailure(profileState);
|
||||
if (!profileState.running) {
|
||||
const idleStop = resolveIdleProfileStopOutcome(profile);
|
||||
if (idleStop.closePlaywright) {
|
||||
|
||||
@@ -121,6 +121,23 @@ describe("browser server-context ensureBrowserAvailable", () => {
|
||||
expect(stopOpenClawChrome).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("deduplicates concurrent lazy-start calls across fresh profile contexts", async () => {
|
||||
const { launchOpenClawChrome, stopOpenClawChrome, isChromeCdpReady, state } =
|
||||
setupEnsureBrowserAvailableHarness();
|
||||
isChromeCdpReady.mockResolvedValue(true);
|
||||
mockLaunchedChrome(launchOpenClawChrome, 457);
|
||||
|
||||
const firstCtx = createBrowserRouteContext({ getState: () => state });
|
||||
const secondCtx = createBrowserRouteContext({ getState: () => state });
|
||||
const first = firstCtx.forProfile("openclaw").ensureBrowserAvailable();
|
||||
const second = secondCtx.forProfile("openclaw").ensureBrowserAvailable();
|
||||
await vi.advanceTimersByTimeAsync(100);
|
||||
await expect(Promise.all([first, second])).resolves.toEqual([undefined, undefined]);
|
||||
|
||||
expect(launchOpenClawChrome).toHaveBeenCalledTimes(1);
|
||||
expect(stopOpenClawChrome).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("passes request-local headless override to initial launch", async () => {
|
||||
const { launchOpenClawChrome, stopOpenClawChrome, isChromeCdpReady, profile } =
|
||||
setupEnsureBrowserAvailableHarness();
|
||||
@@ -203,6 +220,52 @@ describe("browser server-context ensureBrowserAvailable", () => {
|
||||
expect(stopOpenClawChrome).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("cools down repeated managed Chrome launch failures across route contexts", async () => {
|
||||
const { launchOpenClawChrome, stopOpenClawChrome, isChromeCdpReady, state } =
|
||||
setupEnsureBrowserAvailableHarness();
|
||||
isChromeCdpReady.mockResolvedValue(true);
|
||||
launchOpenClawChrome.mockRejectedValue(new Error("Failed to start Chrome CDP"));
|
||||
|
||||
for (let attempt = 0; attempt < 3; attempt += 1) {
|
||||
const ctx = createBrowserRouteContext({ getState: () => state });
|
||||
await expect(ctx.forProfile("openclaw").ensureBrowserAvailable()).rejects.toThrow(
|
||||
"Failed to start Chrome CDP",
|
||||
);
|
||||
}
|
||||
|
||||
const cooledDownCtx = createBrowserRouteContext({ getState: () => state });
|
||||
await expect(cooledDownCtx.forProfile("openclaw").ensureBrowserAvailable()).rejects.toThrow(
|
||||
'Browser launch for profile "openclaw" is cooling down after 3 consecutive managed Chrome launch failures.',
|
||||
);
|
||||
await expect(cooledDownCtx.forProfile("openclaw").ensureBrowserAvailable()).rejects.toThrow(
|
||||
"set browser.enabled=false if the browser tool is not needed",
|
||||
);
|
||||
|
||||
expect(launchOpenClawChrome).toHaveBeenCalledTimes(3);
|
||||
expect(stopOpenClawChrome).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("allows one managed Chrome launch attempt after the cooldown expires", async () => {
|
||||
const { launchOpenClawChrome, isChromeCdpReady, state } = setupEnsureBrowserAvailableHarness();
|
||||
isChromeCdpReady.mockResolvedValue(true);
|
||||
launchOpenClawChrome.mockRejectedValue(new Error("Failed to start Chrome CDP"));
|
||||
|
||||
for (let attempt = 0; attempt < 3; attempt += 1) {
|
||||
const ctx = createBrowserRouteContext({ getState: () => state });
|
||||
await expect(ctx.forProfile("openclaw").ensureBrowserAvailable()).rejects.toThrow(
|
||||
"Failed to start Chrome CDP",
|
||||
);
|
||||
}
|
||||
|
||||
await vi.advanceTimersByTimeAsync(30_000);
|
||||
const retryCtx = createBrowserRouteContext({ getState: () => state });
|
||||
await expect(retryCtx.forProfile("openclaw").ensureBrowserAvailable()).rejects.toThrow(
|
||||
"Failed to start Chrome CDP",
|
||||
);
|
||||
|
||||
expect(launchOpenClawChrome).toHaveBeenCalledTimes(4);
|
||||
});
|
||||
|
||||
it("reuses a pre-existing loopback browser after an initial short probe miss", async () => {
|
||||
const { launchOpenClawChrome, stopOpenClawChrome, isChromeCdpReady, profile, state } =
|
||||
setupEnsureBrowserAvailableHarness();
|
||||
|
||||
@@ -35,6 +35,8 @@ export function createProfileResetOps({
|
||||
|
||||
const userDataDir = resolveOpenClawUserDataDir(profile.name);
|
||||
const profileState = getProfileState();
|
||||
profileState.managedLaunchFailure = undefined;
|
||||
profileState.ensureBrowserAvailable = null;
|
||||
const httpReachable = await isHttpReachable(300);
|
||||
if (httpReachable && !profileState.running) {
|
||||
// Port in use but not by us - kill it.
|
||||
|
||||
@@ -11,6 +11,13 @@ export type { BrowserTab };
|
||||
export type ProfileRuntimeState = {
|
||||
profile: ResolvedBrowserProfile;
|
||||
running: RunningChrome | null;
|
||||
ensureBrowserAvailable?: { key: string; promise: Promise<void> } | null;
|
||||
managedLaunchFailure?: {
|
||||
consecutiveFailures: number;
|
||||
lastFailureAt: number;
|
||||
cooldownUntil?: number;
|
||||
lastError: string;
|
||||
};
|
||||
/** Sticky tab selection when callers omit targetId (keeps snapshot+act consistent). */
|
||||
lastTargetId?: string | null;
|
||||
/** Stable, user-facing tab aliases scoped to this profile runtime. */
|
||||
|
||||
Reference in New Issue
Block a user