fix: stabilize Parallels update restart checks

This commit is contained in:
Peter Steinberger
2026-04-30 04:56:06 +01:00
parent d363565375
commit e648f38efc
4 changed files with 270 additions and 52 deletions

View File

@@ -39,12 +39,34 @@ stop_openclaw_gateway_processes() {
OPENCLAW_DISABLE_BUNDLED_PLUGINS=1 /opt/homebrew/bin/openclaw gateway stop || true
pkill -f 'openclaw.*gateway' >/dev/null 2>&1 || true
}
start_openclaw_gateway() {
if /opt/homebrew/bin/openclaw gateway restart; then
return
fi
pkill -f 'openclaw.*gateway' >/dev/null 2>&1 || true
rm -f /tmp/openclaw-parallels-macos-gateway.log
nohup env OPENCLAW_HOME="$HOME" OPENCLAW_STATE_DIR="$HOME/.openclaw" OPENCLAW_CONFIG_PATH="$HOME/.openclaw/openclaw.json" ${input.auth.apiKeyEnv}=${shellQuote(
input.auth.apiKeyValue,
)} /opt/homebrew/bin/openclaw gateway run --bind loopback --port 18789 --force >/tmp/openclaw-parallels-macos-gateway.log 2>&1 </dev/null &
}
wait_for_gateway() {
deadline=$((SECONDS + 240))
while [ "$SECONDS" -lt "$deadline" ]; do
if /opt/homebrew/bin/openclaw gateway status --deep --require-rpc --timeout 15000; then
return
fi
sleep 2
done
cat /tmp/openclaw-parallels-macos-gateway.log >&2 || true
echo "gateway did not become ready after update" >&2
exit 1
}
scrub_future_plugin_entries
stop_openclaw_gateway_processes
OPENCLAW_DISABLE_BUNDLED_PLUGINS=1 /opt/homebrew/bin/openclaw update --tag ${shellQuote(input.updateTarget)} --yes --json
${posixVersionCheck("/opt/homebrew/bin/openclaw", input.expectedNeedle)}
/opt/homebrew/bin/openclaw gateway restart
/opt/homebrew/bin/openclaw gateway status --deep --require-rpc
start_openclaw_gateway
wait_for_gateway
/opt/homebrew/bin/openclaw models set ${shellQuote(input.auth.modelId)}
/opt/homebrew/bin/openclaw config set agents.defaults.skipBootstrap true --strict-json
${posixAgentWorkspaceScript("Parallels npm update smoke test assistant.")}
@@ -122,12 +144,33 @@ stop_openclaw_gateway_processes() {
OPENCLAW_DISABLE_BUNDLED_PLUGINS=1 openclaw gateway stop || true
pkill -f 'openclaw.*gateway' >/dev/null 2>&1 || true
}
start_openclaw_gateway() {
pkill -f "openclaw gateway run" >/dev/null 2>&1 || true
rm -f /tmp/openclaw-parallels-linux-gateway.log
setsid sh -lc ${shellQuote(
`exec env OPENCLAW_HOME=/root OPENCLAW_STATE_DIR=/root/.openclaw OPENCLAW_CONFIG_PATH=/root/.openclaw/openclaw.json OPENCLAW_DISABLE_BONJOUR=1 ${input.auth.apiKeyEnv}=${shellQuote(
input.auth.apiKeyValue,
)} openclaw gateway run --bind loopback --port 18789 --force >/tmp/openclaw-parallels-linux-gateway.log 2>&1`,
)} >/dev/null 2>&1 < /dev/null &
}
wait_for_gateway() {
deadline=$((SECONDS + 240))
while [ "$SECONDS" -lt "$deadline" ]; do
if openclaw gateway status --deep --require-rpc --timeout 15000; then
return
fi
sleep 2
done
cat /tmp/openclaw-parallels-linux-gateway.log >&2 || true
echo "gateway did not become ready after update" >&2
exit 1
}
scrub_future_plugin_entries
stop_openclaw_gateway_processes
OPENCLAW_DISABLE_BUNDLED_PLUGINS=1 openclaw update --tag ${shellQuote(input.updateTarget)} --yes --json
${posixVersionCheck("openclaw", input.expectedNeedle)}
openclaw gateway restart
openclaw gateway status --deep --require-rpc
start_openclaw_gateway
wait_for_gateway
openclaw models set ${shellQuote(input.auth.modelId)}
openclaw config set agents.defaults.skipBootstrap true --strict-json
${posixAgentWorkspaceScript("Parallels npm update smoke test assistant.")}

View File

@@ -14,7 +14,6 @@ import {
resolveLatestVersion,
resolveProviderAuth,
run,
runStreaming,
say,
startHostServer,
writeJson,
@@ -45,6 +44,11 @@ interface Job {
promise: Promise<number>;
}
interface UpdateJobContext {
append(chunk: string | Uint8Array): void;
logPath: string;
}
interface NpmUpdateSummary {
packageSpec: string;
updateTarget: string;
@@ -296,15 +300,15 @@ class NpmUpdateSmoke {
const jobs: Job[] = [];
if (this.options.platforms.has("macos")) {
ensureVmRunning(macosVm);
jobs.push(this.spawnUpdate("macOS", "macos", () => this.runMacosUpdate()));
jobs.push(this.spawnUpdate("macOS", "macos", (ctx) => this.runMacosUpdate(ctx)));
}
if (this.options.platforms.has("windows")) {
ensureVmRunning(windowsVm);
jobs.push(this.spawnUpdate("Windows", "windows", () => this.runWindowsUpdate()));
jobs.push(this.spawnUpdate("Windows", "windows", (ctx) => this.runWindowsUpdate(ctx)));
}
if (this.options.platforms.has("linux")) {
ensureVmRunning(this.linuxVm);
jobs.push(this.spawnUpdate("Linux", "linux", () => this.runLinuxUpdate()));
jobs.push(this.spawnUpdate("Linux", "linux", (ctx) => this.runLinuxUpdate(ctx)));
}
await this.monitorJobs("update", jobs);
for (const job of jobs) {
@@ -319,7 +323,11 @@ class NpmUpdateSmoke {
}
}
private spawnUpdate(label: string, platform: Platform, fn: () => Promise<void> | void): Job {
private spawnUpdate(
label: string,
platform: Platform,
fn: (ctx: UpdateJobContext) => Promise<void> | void,
): Job {
const logPath = path.join(this.runDir, `${platform}-update.log`);
const job: Job = {
done: false,
@@ -328,8 +336,6 @@ class NpmUpdateSmoke {
promise: Promise.resolve(1),
};
job.promise = (async () => {
const originalStdout = process.stdout.write.bind(process.stdout);
const originalStderr = process.stderr.write.bind(process.stderr);
let log = "";
const append = (chunk: string | Uint8Array): boolean => {
const text = typeof chunk === "string" ? chunk : Buffer.from(chunk).toString("utf8");
@@ -340,11 +346,7 @@ class NpmUpdateSmoke {
append(`${label} update timed out after ${updateTimeoutSeconds}s\n`);
}, updateTimeoutSeconds * 1000);
try {
process.stdout.write = ((chunk: string | Uint8Array) =>
append(chunk)) as typeof process.stdout.write;
process.stderr.write = ((chunk: string | Uint8Array) =>
append(chunk)) as typeof process.stderr.write;
await fn();
await fn({ append, logPath });
await writeFile(logPath, log, "utf8");
return 0;
} catch (error) {
@@ -353,8 +355,6 @@ class NpmUpdateSmoke {
return 1;
} finally {
clearTimeout(timeout);
process.stdout.write = originalStdout;
process.stderr.write = originalStderr;
}
})().finally(() => {
job.done = true;
@@ -362,16 +362,16 @@ class NpmUpdateSmoke {
return job;
}
private runMacosUpdate(): void {
this.guestMacos(this.updateScript("macos"), updateTimeoutSeconds * 1000);
private async runMacosUpdate(ctx: UpdateJobContext): Promise<void> {
await this.guestMacos(this.updateScript("macos"), updateTimeoutSeconds * 1000, ctx);
}
private runWindowsUpdate(): Promise<void> {
return this.guestWindows(this.updateScript("windows"), updateTimeoutSeconds * 1000);
private runWindowsUpdate(ctx: UpdateJobContext): Promise<void> {
return this.guestWindows(this.updateScript("windows"), updateTimeoutSeconds * 1000, ctx);
}
private runLinuxUpdate(): void {
this.guestLinux(this.updateScript("linux"), updateTimeoutSeconds * 1000);
private async runLinuxUpdate(ctx: UpdateJobContext): Promise<void> {
await this.guestLinux(this.updateScript("linux"), updateTimeoutSeconds * 1000, ctx);
}
private updateScript(platform: Platform): string {
@@ -436,24 +436,112 @@ class NpmUpdateSmoke {
}
}
private guestMacos(script: string, timeoutMs: number): void {
run(
private async guestMacos(
script: string,
timeoutMs: number,
ctx: UpdateJobContext,
): Promise<void> {
const macosExecArgs = this.resolveMacosUpdateExecArgs(ctx);
const status = await this.runStreamingToJobLog(
"prlctl",
[
"exec",
macosVm,
"--current-user",
"/usr/bin/env",
"PATH=/opt/homebrew/bin:/opt/homebrew/opt/node/bin:/opt/homebrew/sbin:/usr/bin:/bin:/usr/sbin:/sbin",
"/bin/bash",
"-lc",
script,
],
{ timeoutMs },
["exec", macosVm, ...macosExecArgs, "/bin/bash", "-lc", script],
timeoutMs,
ctx,
);
if (status !== 0) {
throw new Error(`macOS update command failed with exit code ${status}`);
}
}
private async guestWindows(script: string, timeoutMs: number): Promise<void> {
private resolveMacosUpdateExecArgs(ctx: UpdateJobContext): string[] {
const guestPath =
"/opt/homebrew/bin:/opt/homebrew/opt/node/bin:/opt/homebrew/sbin:/usr/bin:/bin:/usr/sbin:/sbin";
const currentUser = run("prlctl", ["exec", macosVm, "--current-user", "whoami"], {
check: false,
quiet: true,
timeoutMs: 45_000,
});
const user = currentUser.stdout.trim().replaceAll("\r", "").split("\n").at(-1) ?? "";
if (currentUser.status === 0 && /^[A-Za-z0-9._-]+$/.test(user)) {
return ["--current-user", "/usr/bin/env", `PATH=${guestPath}`];
}
const fallbackUser = this.resolveMacosDesktopUser();
if (!fallbackUser) {
ctx.append(currentUser.stdout);
ctx.append(currentUser.stderr);
throw new Error("macOS desktop user unavailable before update phase");
}
ctx.append(
`desktop user unavailable via Parallels --current-user; using root sudo fallback for ${fallbackUser}\n`,
);
const home = this.resolveMacosDesktopHome(fallbackUser);
return [
"/usr/bin/sudo",
"-H",
"-u",
fallbackUser,
"/usr/bin/env",
`HOME=${home}`,
`USER=${fallbackUser}`,
`LOGNAME=${fallbackUser}`,
`PATH=${guestPath}`,
];
}
private resolveMacosDesktopUser(): string {
const consoleUser =
run("prlctl", ["exec", macosVm, "/usr/bin/stat", "-f", "%Su", "/dev/console"], {
check: false,
quiet: true,
timeoutMs: 30_000,
})
.stdout.trim()
.replaceAll("\r", "")
.split("\n")
.at(-1) ?? "";
if (
/^[A-Za-z0-9._-]+$/.test(consoleUser) &&
consoleUser !== "root" &&
consoleUser !== "loginwindow"
) {
return consoleUser;
}
const users = run(
"prlctl",
["exec", macosVm, "/usr/bin/dscl", ".", "-list", "/Users", "NFSHomeDirectory"],
{ check: false, quiet: true, timeoutMs: 30_000 },
).stdout.replaceAll("\r", "");
for (const line of users.split("\n")) {
const [user, home] = line.trim().split(/\s+/);
if (
user &&
home?.startsWith("/Users/") &&
!user.startsWith("_") &&
user !== "Shared" &&
user !== ".localized"
) {
return user;
}
}
return "";
}
private resolveMacosDesktopHome(user: string): string {
const output = run(
"prlctl",
["exec", macosVm, "/usr/bin/dscl", ".", "-read", `/Users/${user}`, "NFSHomeDirectory"],
{ check: false, quiet: true, timeoutMs: 30_000 },
).stdout.replaceAll("\r", "");
const match = /NFSHomeDirectory:\s*(\S+)/.exec(output);
return match?.[1] ?? `/Users/${user}`;
}
private async guestWindows(
script: string,
timeoutMs: number,
ctx: UpdateJobContext,
): Promise<void> {
const fileBase = `openclaw-parallels-npm-update-windows-${process.pid}-${Date.now()}`;
const pathsScript = `$base = Join-Path $env:TEMP '${fileBase}'
$scriptPath = "$base.ps1"
@@ -474,7 +562,7 @@ ${script}
} finally {
Set-Content -Path $donePath -Value 'done' -Encoding UTF8
}`;
run(
const writeScript = run(
"prlctl",
[
"exec",
@@ -490,11 +578,21 @@ Remove-Item -Path $scriptPath, $logPath, $donePath, $exitPath -Force -ErrorActio
[System.IO.File]::WriteAllText($scriptPath, [Console]::In.ReadToEnd(), [System.Text.UTF8Encoding]::new($false))
if (!(Test-Path $scriptPath)) { throw "background update script was not written" }`),
],
{ input: payload, timeoutMs: Math.min(timeoutMs, 120_000) },
{ check: false, input: payload, timeoutMs: Math.min(timeoutMs, 120_000) },
);
if (writeScript.stdout) {
ctx.append(writeScript.stdout);
}
if (writeScript.stderr) {
ctx.append(writeScript.stderr);
}
if (writeScript.status !== 0) {
throw new Error(
`Windows update background script write failed with exit code ${writeScript.status}`,
);
}
const launchLogPath = path.join(this.runDir, `${fileBase}-launch.log`);
const launchStatus = await runStreaming(
const launchStatus = await this.runStreamingToJobLog(
"prlctl",
[
"exec",
@@ -506,12 +604,9 @@ if (!(Test-Path $scriptPath)) { throw "background update script was not written"
"/c",
`start "" /min powershell.exe -NoProfile -WindowStyle Hidden -ExecutionPolicy Bypass -File "%TEMP%\\${fileBase}.ps1"`,
],
{ logPath: launchLogPath, quiet: true, timeoutMs: 20_000 },
20_000,
ctx,
);
const launchLog = await readFile(launchLogPath, "utf8").catch(() => "");
if (launchLog) {
process.stdout.write(launchLog);
}
if (launchStatus !== 0 && launchStatus !== 124) {
throw new Error(`Windows update background launch failed with exit code ${launchStatus}`);
}
@@ -550,10 +645,10 @@ if (Test-Path $donePath) {
{ check: false, timeoutMs: Math.min(30_000, Math.max(1_000, deadline - Date.now())) },
);
if (poll.stdout) {
process.stdout.write(poll.stdout);
ctx.append(poll.stdout);
}
if (poll.stderr) {
process.stderr.write(poll.stderr);
ctx.append(poll.stderr);
}
const offsetMatch = poll.stdout.match(/__OPENCLAW_LOG_OFFSET__:(\d+)/);
if (offsetMatch) {
@@ -588,9 +683,54 @@ Remove-Item -Path $scriptPath, $logPath, $donePath, $exitPath -Force -ErrorActio
throw new Error(`Windows update timed out after ${updateTimeoutSeconds}s`);
}
private guestLinux(script: string, timeoutMs: number): void {
run("prlctl", ["exec", this.linuxVm, "/usr/bin/env", "HOME=/root", "bash", "-lc", script], {
private async guestLinux(
script: string,
timeoutMs: number,
ctx: UpdateJobContext,
): Promise<void> {
const status = await this.runStreamingToJobLog(
"prlctl",
["exec", this.linuxVm, "/usr/bin/env", "HOME=/root", "bash", "-lc", script],
timeoutMs,
ctx,
);
if (status !== 0) {
throw new Error(`Linux update command failed with exit code ${status}`);
}
}
private async runStreamingToJobLog(
command: string,
args: string[],
timeoutMs: number,
ctx: UpdateJobContext,
): Promise<number> {
return await new Promise((resolve, reject) => {
const child = spawn(command, args, {
cwd: repoRoot,
env: process.env,
stdio: ["ignore", "pipe", "pipe"],
});
child.stdout.on("data", (chunk: Buffer) => ctx.append(chunk));
child.stderr.on("data", (chunk: Buffer) => ctx.append(chunk));
let timedOut = false;
const timer = setTimeout(() => {
timedOut = true;
child.kill("SIGTERM");
setTimeout(() => child.kill("SIGKILL"), 2_000).unref();
}, timeoutMs);
child.on("error", reject);
child.on("close", (code, signal) => {
clearTimeout(timer);
if (timedOut) {
resolve(124);
return;
}
resolve(code ?? (signal ? 128 : 1));
});
});
}

View File

@@ -351,6 +351,35 @@ describe("inspectGatewayRestart", () => {
expect(snapshot.versionMismatch).toBeUndefined();
});
it("accepts matching-version restart liveness when the probe lacks operator scope", async () => {
probeGateway.mockResolvedValue({
ok: false,
close: null,
connectLatencyMs: 12,
error: "missing scope: operator.read",
auth: { capability: "connected_no_operator_scope" },
server: { version: "2026.4.24", connId: "new" },
});
const snapshot = await inspectGatewayRestartWithSnapshot({
runtime: { status: "running", pid: 8000 },
expectedVersion: "2026.4.24",
portUsage: {
port: 18789,
status: "busy",
listeners: [{ pid: 8000, commandLine: "openclaw-gateway" }],
hints: [],
},
});
expect(snapshot).toMatchObject({
healthy: true,
gatewayVersion: "2026.4.24",
expectedVersion: "2026.4.24",
});
expect(snapshot.versionMismatch).toBeUndefined();
});
it("stops waiting once the restarted gateway reports the wrong version", async () => {
probeGateway.mockResolvedValue({
ok: true,

View File

@@ -237,8 +237,14 @@ async function confirmGatewayReachable(params: {
timeoutMs: 3_000,
includeDetails: params.includeHealthDetails === true,
});
const reachedGateway =
probe.ok ||
looksLikeAuthClose(probe.close?.code, probe.close?.reason) ||
(probe.connectLatencyMs != null &&
probe.server?.version != null &&
probe.auth.capability === "connected_no_operator_scope");
return {
reachable: probe.ok || looksLikeAuthClose(probe.close?.code, probe.close?.reason),
reachable: reachedGateway,
gatewayVersion: probe.server?.version ?? null,
activatedPluginErrors: readActivatedPluginErrors(probe.health),
channelProbeErrors: readChannelProbeErrors(probe.health),