fix(browser): derive Chrome launch readiness from a single CDP diagnostic (#82904) (#82986)

* fix(browser): derive Chrome launch readiness from a single CDP diagnostic (#82904)

The pre-fix launch path used `isChromeReachable` (a lightweight HTTP
`/json/version` probe) to decide failure, then called the stronger
`diagnoseChromeCdp` only to format the thrown error. On macOS cold
starts where the HTTP probe transiently fails *between* the polling
loop and the diagnostic call, the runtime would throw

    "Failed to start Chrome CDP on port ... { ok: true, wsUrl: ... }"

— a self-contradicting error containing a successful diagnostic
result. Per #82904 this is the actual user-visible bug.

Capture `diagnoseChromeCdp` ONCE after the polling loop and use it for
both the decision and the error text. The diagnostic helper already
includes the lightweight reachability check and adds a websocket
`Browser.getVersion` health command, so it is strictly stronger than
the HTTP probe; if `diagnoseChromeCdp` returns ok the launch
genuinely succeeded.

The existing `withMockChromeCdpServer` success test in
chrome.internal.test.ts still exercises this code path end-to-end
(real HTTP server + real websocket handshake), so the regression-safety
case is covered. The asymmetric `probe-fails-but-diagnostic-succeeds`
scenario is hard to mock without restructuring the existing test
harness; this commit ships the fix and relies on the upstream
ClawSweeper review criteria (manual managed-Chrome cold-start proof)
plus the standalone real-behavior probe in the PR body.

* fix(browser): import ChromeCdpDiagnostic type from chrome.diagnostics

The annotation `let finalDiagnostic: ChromeCdpDiagnostic | null` referenced
a type that was only re-exported (not imported) inside chrome.ts, causing
oxlint/tsc to read it as the implicit `error` type and fail check-lint,
check-prod-types, check-test-types, etc. Add the type to the existing
chrome.diagnostics.js import block.

* fix(browser): preserve Chrome launch diagnostic fallback

* test(browser): satisfy launch diagnostic lint

* fix(browser): keep Chrome launch readiness scoped

* test(browser): answer CDP launch mock probe

---------

Co-authored-by: hclsys <hclsys@users.noreply.github.com>
Co-authored-by: Peter Steinberger <steipete@gmail.com>
This commit is contained in:
hcl
2026-05-17 18:11:15 +08:00
committed by GitHub
parent bf51933358
commit 42435d110b
3 changed files with 233 additions and 54 deletions

View File

@@ -77,6 +77,7 @@ Docs: https://docs.openclaw.ai
- Process/diagnostics: report active lane blockers in lane wait warnings so `queueAhead=0` no longer hides commands waiting behind active work. Fixes #82791. (#82792) Thanks @galiniliev.
- Process/diagnostics: stop counting the active processing turn as queued backlog in liveness warnings so transient max-only event-loop spikes do not surface as gateway warnings.
- Agents/replies: classify provider conversation-state rejections and return a clear message-channel error instead of auto-resetting or falling back to a generic runner failure. (#82616) Thanks @dutifulbob.
- Browser plugin: trust managed Chrome CDP diagnostics when launch HTTP probes race cold-start readiness, avoiding false startup failures. Fixes #82904. (#82986) Thanks @kmanan and @hclsys.
## 2026.5.17

View File

@@ -153,7 +153,29 @@ async function withMockChromeCdpServer(params: {
wss.emit("connection", ws, req);
});
});
params.onConnection?.(wss);
if (params.onConnection) {
params.onConnection(wss);
} else {
wss.on("connection", (ws) => {
ws.on("message", (raw) => {
const message = JSON.parse(rawDataToString(raw)) as {
id?: unknown;
method?: unknown;
};
if (message.method === "Browser.getVersion" && typeof message.id === "number") {
ws.send(
JSON.stringify({
id: message.id,
result: {
product: "Chrome/Mock",
userAgent: "OpenClawTest",
},
}),
);
}
});
});
}
await new Promise<void>((resolve, reject) => {
server.listen(0, "127.0.0.1", () => resolve());
server.once("error", reject);
@@ -484,6 +506,119 @@ describe("chrome.ts internal", () => {
});
});
it("accepts a ready CDP diagnostic after the launch HTTP probe expires", async () => {
vi.spyOn(fs, "existsSync").mockImplementation((p) => {
const s = String(p);
if (
s.includes("Google Chrome") ||
s.includes("google-chrome") ||
s.includes("/usr/bin/chromium")
) {
return true;
}
if (s.endsWith("Local State") || s.endsWith("Preferences")) {
return true;
}
return false;
});
spawnMock.mockImplementation(() => makeFakeProc());
const originalFetch = globalThis.fetch;
let now = 1_000_000;
vi.spyOn(Date, "now").mockImplementation(() => now);
let discoveryCalls = 0;
vi.stubGlobal(
"fetch",
vi.fn(async (input: RequestInfo | URL, init?: RequestInit) => {
const url =
typeof input === "string" ? input : input instanceof URL ? input.href : input.url;
if (url.includes("/json/version")) {
discoveryCalls += 1;
if (discoveryCalls === 1) {
now += 2;
throw new Error("ECONNREFUSED");
}
}
return await originalFetch(input, init);
}),
);
await withMockChromeCdpServer({
wsPath: "/devtools/browser/COLD_START",
run: async (baseUrl) => {
const port = new URL(baseUrl).port;
const profile = makeProfile(Number(port));
const running = await launchOpenClawChrome(
makeResolved({ localLaunchTimeoutMs: 1 }),
profile,
);
expect(running.pid).toBe(4242);
expect(discoveryCalls).toBeGreaterThan(1);
running.proc.kill?.("SIGTERM");
},
});
});
it("keeps the launched process when fallback diagnostic sees HTTP before WS readiness", async () => {
vi.spyOn(fs, "existsSync").mockImplementation((p) => {
const s = String(p);
if (
s.includes("Google Chrome") ||
s.includes("google-chrome") ||
s.includes("/usr/bin/chromium")
) {
return true;
}
if (s.endsWith("Local State") || s.endsWith("Preferences")) {
return true;
}
return false;
});
const fakeProc = makeFakeProc();
spawnMock.mockImplementation(() => fakeProc);
const originalFetch = globalThis.fetch;
let now = 1_000_000;
vi.spyOn(Date, "now").mockImplementation(() => now);
let discoveryCalls = 0;
vi.stubGlobal(
"fetch",
vi.fn(async (input: RequestInfo | URL, init?: RequestInit) => {
const url =
typeof input === "string" ? input : input instanceof URL ? input.href : input.url;
if (url.includes("/json/version")) {
discoveryCalls += 1;
if (discoveryCalls === 1) {
now += 2;
throw new Error("ECONNREFUSED");
}
}
return await originalFetch(input, init);
}),
);
await withMockChromeCdpServer({
wsPath: "/devtools/browser/WS_WARMING",
onConnection: (wss) => {
wss.on("connection", () => {
// HTTP discovery is enough for launch; caller owns WS readiness.
});
},
run: async (baseUrl) => {
const port = new URL(baseUrl).port;
const profile = makeProfile(Number(port));
const running = await launchOpenClawChrome(
makeResolved({ localLaunchTimeoutMs: 1 }),
profile,
);
expect(running.pid).toBe(4242);
expect(discoveryCalls).toBeGreaterThan(1);
expect(fakeProc.kill).not.toHaveBeenCalledWith("SIGKILL");
running.proc.kill?.("SIGTERM");
},
});
});
it("uses profile executablePath over global executablePath when launching", async () => {
const originalPlatform = process.platform;
vi.spyOn(fs, "existsSync").mockImplementation((p) => {
@@ -528,16 +663,14 @@ describe("chrome.ts internal", () => {
);
vi.stubEnv("OPENCLAW_CONFIG_PATH", configPath);
let cdpReachable = false;
const originalFetch = globalThis.fetch;
vi.stubGlobal(
"fetch",
vi.fn(async () => {
vi.fn(async (input: RequestInfo | URL, init?: RequestInit) => {
if (!cdpReachable) {
throw new Error("ECONNREFUSED");
}
return {
ok: true,
json: async () => ({ webSocketDebuggerUrl: "ws://127.0.0.1/devtools" }),
} as unknown as Response;
return await originalFetch(input, init);
}),
);
vi.spyOn(fs, "existsSync").mockImplementation((p) => {
@@ -567,27 +700,33 @@ describe("chrome.ts internal", () => {
return secondProc;
});
const profile = { ...makeProfile(18888), executablePath: "/tmp/profile-chrome" };
const userDataDir = resolveOpenClawUserDataDir(profile.name);
await fsp.mkdir(userDataDir, { recursive: true });
await fsp.writeFile(path.join(userDataDir, "SingletonCookie"), "cookie");
await fsp.writeFile(path.join(userDataDir, "SingletonSocket"), "socket");
await fsp.symlink("remote-host-535", path.join(userDataDir, "SingletonLock"));
await withMockChromeCdpServer({
wsPath: "/devtools/browser/SINGLETON_RETRY",
run: async (baseUrl) => {
const port = Number(new URL(baseUrl).port);
const profile = { ...makeProfile(port), executablePath: "/tmp/profile-chrome" };
const userDataDir = resolveOpenClawUserDataDir(profile.name);
await fsp.mkdir(userDataDir, { recursive: true });
await fsp.writeFile(path.join(userDataDir, "SingletonCookie"), "cookie");
await fsp.writeFile(path.join(userDataDir, "SingletonSocket"), "socket");
await fsp.symlink("remote-host-535", path.join(userDataDir, "SingletonLock"));
try {
const running = await launchOpenClawChrome(
makeResolved({ localLaunchTimeoutMs: 20 }),
profile,
);
expect(running.proc).toBe(secondProc);
expect(firstProc.kill).toHaveBeenCalledWith("SIGKILL");
expect(spawnCalls).toBe(2);
expect(fs.existsSync(path.join(userDataDir, "SingletonLock"))).toBe(false);
expect(fs.existsSync(path.join(userDataDir, "SingletonSocket"))).toBe(false);
running.proc.kill?.("SIGTERM");
} finally {
await fsp.rm(userDataDir, { recursive: true, force: true });
}
try {
const running = await launchOpenClawChrome(
makeResolved({ localLaunchTimeoutMs: 20 }),
profile,
);
expect(running.proc).toBe(secondProc);
expect(firstProc.kill).toHaveBeenCalledWith("SIGKILL");
expect(spawnCalls).toBe(2);
expect(fs.existsSync(path.join(userDataDir, "SingletonLock"))).toBe(false);
expect(fs.existsSync(path.join(userDataDir, "SingletonSocket"))).toBe(false);
running.proc.kill?.("SIGTERM");
} finally {
await fsp.rm(userDataDir, { recursive: true, force: true });
}
},
});
});
it("throws with stderr hint + sandbox hint when CDP never becomes reachable", async () => {

View File

@@ -33,6 +33,7 @@ import {
} from "./cdp.helpers.js";
import { normalizeCdpWsUrl } from "./cdp.js";
import {
type ChromeCdpDiagnostic,
diagnoseChromeCdp,
formatChromeCdpDiagnostic,
type ChromeVersion,
@@ -72,6 +73,12 @@ const CHROME_SINGLETON_LOCK_PATHS = [
] as const;
const CHROME_SINGLETON_IN_USE_PATTERN = /profile appears to be in use by another chromium process/i;
const CHROME_MISSING_DISPLAY_PATTERN = /missing x server|\$DISPLAY/i;
const CHROME_HTTP_DISCOVERY_FAILURE_CODES = new Set([
"ssrf_blocked",
"http_unreachable",
"http_status_failed",
"invalid_json",
]);
export type { BrowserExecutable } from "./chrome.executables.js";
export {
@@ -100,6 +107,16 @@ function exists(filePath: string) {
}
}
function diagnosticShowsChromeHttpDiscovery(diagnostic: ChromeCdpDiagnostic | null): boolean {
if (!diagnostic) {
return false;
}
if (diagnostic.ok) {
return true;
}
return !CHROME_HTTP_DISCOVERY_FAILURE_CODES.has(diagnostic.code);
}
function processExists(pid: number): boolean {
if (!Number.isInteger(pid) || pid <= 0) {
return false;
@@ -531,43 +548,65 @@ export async function launchOpenClawChrome(
try {
const readyDeadline =
Date.now() + (resolved.localLaunchTimeoutMs ?? CHROME_LAUNCH_READY_WINDOW_MS);
let launchHttpReachable = false;
// Full CDP WebSocket readiness is handled by the caller's
// waitForCdpReadyAfterLaunch() budget; launch only owns process discovery.
while (Date.now() < readyDeadline) {
if (await isChromeReachable(profile.cdpUrl)) {
launchHttpReachable = true;
break;
}
await new Promise((r) => setTimeout(r, CHROME_LAUNCH_READY_POLL_MS));
}
if (!(await isChromeReachable(profile.cdpUrl))) {
const diagnosticText = await diagnoseChromeCdp(profile.cdpUrl)
.then(formatChromeCdpDiagnostic)
.catch((err) => `CDP diagnostic failed: ${safeChromeCdpErrorMessage(err)}.`);
const stderrOutput =
normalizeOptionalString(Buffer.concat(stderrChunks).toString("utf8")) ?? "";
const redactedStderrOutput = redactToolPayloadText(stderrOutput);
if (
allowSingletonRecovery &&
CHROME_SINGLETON_IN_USE_PATTERN.test(stderrOutput) &&
clearStaleChromeSingletonLocks(userDataDir)
) {
log.warn(
`Removed stale Chromium Singleton* locks for profile "${profile.name}" and retrying launch.`,
);
await terminateChromeForRetry(proc, userDataDir);
return await launchOnceAndWait(false);
}
const stderrHint = redactedStderrOutput
? `\nChrome stderr:\n${redactedStderrOutput.slice(0, CHROME_STDERR_HINT_MAX_CHARS)}`
: "";
const launchHints = chromeLaunchHints({ stderrOutput, resolved, profile, launchOptions });
if (!launchHttpReachable) {
let finalDiagnostic: ChromeCdpDiagnostic | null = null;
let diagnosticErrorText: string | null = null;
try {
proc.kill("SIGKILL");
} catch {
// ignore
finalDiagnostic = await diagnoseChromeCdp(
profile.cdpUrl,
CHROME_REACHABILITY_TIMEOUT_MS,
CHROME_WS_READY_TIMEOUT_MS,
);
} catch (err) {
diagnosticErrorText = `CDP diagnostic failed: ${safeChromeCdpErrorMessage(err)}.`;
}
if (diagnosticShowsChromeHttpDiscovery(finalDiagnostic)) {
launchHttpReachable = true;
}
const diagnosticText = finalDiagnostic
? formatChromeCdpDiagnostic(finalDiagnostic)
: (diagnosticErrorText ?? "CDP diagnostic failed.");
if (launchHttpReachable) {
log.debug(diagnosticText);
} else {
const stderrOutput =
normalizeOptionalString(Buffer.concat(stderrChunks).toString("utf8")) ?? "";
const redactedStderrOutput = redactToolPayloadText(stderrOutput);
if (
allowSingletonRecovery &&
CHROME_SINGLETON_IN_USE_PATTERN.test(stderrOutput) &&
clearStaleChromeSingletonLocks(userDataDir)
) {
log.warn(
`Removed stale Chromium Singleton* locks for profile "${profile.name}" and retrying launch.`,
);
await terminateChromeForRetry(proc, userDataDir);
return await launchOnceAndWait(false);
}
const stderrHint = redactedStderrOutput
? `\nChrome stderr:\n${redactedStderrOutput.slice(0, CHROME_STDERR_HINT_MAX_CHARS)}`
: "";
const launchHints = chromeLaunchHints({ stderrOutput, resolved, profile, launchOptions });
try {
proc.kill("SIGKILL");
} catch {
// ignore
}
throw new Error(
`Failed to start Chrome CDP on port ${profile.cdpPort} for profile "${profile.name}". ${diagnosticText}${launchHints}${stderrHint}`,
);
}
throw new Error(
`Failed to start Chrome CDP on port ${profile.cdpPort} for profile "${profile.name}". ${diagnosticText}${launchHints}${stderrHint}`,
);
}
const pid = proc.pid ?? -1;