test: harden parallels smoke harness

This commit is contained in:
Peter Steinberger
2026-04-22 21:59:59 +01:00
parent 054fda206e
commit 2e38e09b04
9 changed files with 598 additions and 236 deletions

View File

@@ -22,16 +22,17 @@ Use this skill for Parallels guest workflows and smoke interpretation. Do not lo
- Windows: `90m`
- aggregate npm-update wrapper: `150m`
If a lane hits the cap, stop there, inspect the newest `/tmp/openclaw-parallels-*` run directory and phase log, then fix or rerun the smallest affected lane. Do not keep waiting on a capped lane.
- Actual OpenClaw npm install/update phases are a stricter budget than whole lanes: install phases should finish within 7 minutes, and update phases should finish within 5 minutes. If a phase named `install-main`, `install-latest`, `install-baseline`, or `install-baseline-package` exceeds 420s, or a phase named `update-dev` / same-guest `openclaw update` exceeds 300s, treat it as a failure/harness bug and start diagnosis from that phase log. Do not wait for a longer lane cap.
- Actual OpenClaw npm install/update phases are a stricter signal than whole-lane caps: install phases should normally finish within 7 minutes, and update phases should normally show meaningful progress within 5 minutes. If a phase named `install-main`, `install-latest`, `install-baseline`, or `install-baseline-package` exceeds 420s, or a phase named `update-dev` / same-guest `openclaw update` exceeds 300s without new markers, start diagnosis from that phase log and guest process state. Current Windows update phases can still pass after roughly 10-15 minutes because `doctor --fix` may install bundled plugin runtime deps; keep the script hard cap near 20 minutes unless the log is truly stale.
- For a full OS matrix, prefer running independent guest-family lanes in parallel when host capacity allows:
- `timeout --foreground 75m pnpm test:parallels:macos -- --json`
- `timeout --foreground 90m pnpm test:parallels:windows -- --json`
- `timeout --foreground 75m pnpm test:parallels:linux -- --json`
Keep each lane in its own shell/session and track the run directory for each one.
Keep each lane in its own shell/session and track the run directory for each one. Before starting the matrix, run any required host build/package gate to completion. When current-main tgz packaging is needed, the smoke scripts hold a shared package lock through `pnpm build`, inventory/staging, and `npm pack`; if that lock is missing or broken, serialize the matrix instead of accepting concurrent `dist` mutation.
- Do not run multiple smoke lanes against the same guest family at once. Tahoe lanes share the host HTTP port, and Windows/Linux lanes can collide on snapshot restore/start state if two jobs touch the same VM concurrently.
- Do not run the aggregate `pnpm test:parallels:npm-update` wrapper in parallel with individual macOS/Windows/Linux smoke lanes; it touches the same guest families and snapshots.
- Do not start Parallels lanes while any host command may rebuild, clean, or restage `dist` (`pnpm build`, `pnpm ui:build`, `pnpm release:check`, `pnpm test:install:smoke`, npm pack/install smoke, or Docker lanes that run package/build prep). Run the build/package gates first, let them finish, then start the VM matrix. Concurrent `dist` mutation can make host `npm pack` fail with missing files and wastes a full VM cycle.
- Do not start Parallels lanes while any unrelated host command may rebuild, clean, or restage `dist` (`pnpm build`, `pnpm ui:build`, `pnpm release:check`, `pnpm test:install:smoke`, npm pack/install smoke, or Docker lanes that run package/build prep). Run unrelated build/package gates first, let them finish, then start the VM matrix. Concurrent `dist` mutation can make host `npm pack` fail with missing files and wastes a full VM cycle.
- While running or optimizing the matrix, record wall-clock duration per lane and the slowest phase from `/tmp/openclaw-parallels-*` logs. Use that timing before changing smoke order, timeouts, or helper behavior.
- If a host build changes tracked generated files such as `src/canvas-host/a2ui/.bundle.hash`, stop before spending VM time. Commit the generated artifact separately or fix the generator drift, then rerun the smallest affected lane.
- If `main` is moving under active multi-agent work, prefer a detached worktree pinned to one commit for long Parallels suites. The smoke scripts now verify the packed tgz commit instead of live `git rev-parse HEAD`, but a pinned worktree still avoids noisy rebuild/version drift during reruns.
- For `openclaw update --channel dev` lanes, remember the guest clones GitHub `main`, not your local worktree. If a local fix exists but the rerun still fails inside the cloned dev checkout, do not treat that as disproof of the fix until the branch has been pushed.
- For `prlctl exec`, pass the VM name before `--current-user` (`prlctl exec "$VM" --current-user ...`), not the other way around.

View File

@@ -0,0 +1,158 @@
#!/usr/bin/env bash
parallels_package_current_build_commit() {
python3 - <<'PY'
import json
import pathlib
path = pathlib.Path("dist/build-info.json")
if not path.exists():
print("")
else:
print(json.loads(path.read_text()).get("commit", ""))
PY
}
parallels_package_acquire_build_lock() {
local lock_dir="$1"
local owner_pid=""
while ! mkdir "$lock_dir" 2>/dev/null; do
if [[ -f "$lock_dir/pid" ]]; then
owner_pid="$(cat "$lock_dir/pid" 2>/dev/null || true)"
if [[ -n "$owner_pid" ]] && ! kill -0 "$owner_pid" >/dev/null 2>&1; then
printf 'warn: Removing stale Parallels build lock\n' >&2
rm -rf "$lock_dir"
continue
fi
fi
sleep 1
done
printf '%s\n' "$$" >"$lock_dir/pid"
}
parallels_package_release_build_lock() {
local lock_dir="$1"
if [[ -d "$lock_dir" ]]; then
rm -rf "$lock_dir"
fi
}
parallels_package_run_with_build_lock() {
local lock_dir="$1"
local rc
shift
parallels_package_acquire_build_lock "$lock_dir"
set +e
"$@"
rc=$?
set -e
parallels_package_release_build_lock "$lock_dir"
return "$rc"
}
parallels_package_write_dist_inventory() {
node --import tsx scripts/write-npm-update-compat-sidecars.ts
node --import tsx --input-type=module --eval \
'import { writePackageDistInventory } from "./src/infra/package-dist-inventory.ts"; await writePackageDistInventory(process.cwd());'
}
parallels_package_assert_no_generated_drift() {
local drift
drift="$(git status --porcelain -- src/canvas-host/a2ui/.bundle.hash 2>/dev/null || true)"
if [[ -z "$drift" ]]; then
return 0
fi
printf 'error: generated file drift after build; commit or revert before Parallels packaging:\n%s\n' "$drift" >&2
return 1
}
parallels_log_progress_extract() {
local python_bin="$1"
local log_path="$2"
"$python_bin" - "$log_path" <<'PY'
import pathlib
import sys
path = pathlib.Path(sys.argv[1])
if not path.exists():
print("")
raise SystemExit(0)
text = path.read_text(encoding="utf-8", errors="replace")
lines = [line.strip() for line in text.splitlines() if line.strip()]
for line in reversed(lines):
if line.startswith("==> "):
print(line[4:].strip())
raise SystemExit(0)
for line in reversed(lines):
if line.startswith("warn:") or line.startswith("error:"):
print(line)
raise SystemExit(0)
if lines:
print(lines[-1][:240])
else:
print("")
PY
}
parallels_child_job_running() {
local target="$1"
local owner="${2:-}"
local ppid
kill -0 "$target" >/dev/null 2>&1 || return 1
if [[ -z "$owner" ]]; then
return 0
fi
ppid="$(ps -o ppid= -p "$target" 2>/dev/null | tr -d '[:space:]')"
[[ "$ppid" == "$owner" ]]
}
parallels_monitor_jobs_progress() {
local group="$1"
local interval_s="$2"
local stale_s="$3"
local python_bin="$4"
local owner_pid="$5"
shift 5
local labels=()
local pids=()
local logs=()
local last_progress=()
local last_print=()
local i summary now running
while [[ $# -gt 0 ]]; do
labels+=("$1")
pids+=("$2")
logs+=("$3")
last_progress+=("")
last_print+=(0)
shift 3
done
printf '==> %s progress; run dir: %s\n' "$group" "${RUN_DIR:-unknown}"
while :; do
running=0
now=$SECONDS
for ((i = 0; i < ${#pids[@]}; i++)); do
if ! parallels_child_job_running "${pids[$i]}" "$owner_pid"; then
continue
fi
running=1
summary="$(parallels_log_progress_extract "$python_bin" "${logs[$i]}")"
[[ -n "$summary" ]] || summary="waiting for first log line"
if [[ "${last_progress[i]}" != "$summary" ]] || (( now - last_print[i] >= stale_s )); then
printf '==> %s %s: %s\n' "$group" "${labels[$i]}" "$summary"
last_progress[i]="$summary"
last_print[i]=$now
fi
done
(( running )) || break
sleep "$interval_s"
done
}

View File

@@ -1,6 +1,9 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
source "$ROOT_DIR/scripts/e2e/lib/parallels-package-common.sh"
VM_NAME="Ubuntu 24.04.3 ARM64"
VM_NAME_EXPLICIT=0
SNAPSHOT_HINT="fresh"
@@ -37,6 +40,7 @@ TIMEOUT_VERIFY_S=90
TIMEOUT_ONBOARD_S=180
TIMEOUT_AGENT_S=180
TIMEOUT_GATEWAY_S=240
PHASE_STALE_WARN_S=60
FRESH_MAIN_STATUS="skip"
FRESH_MAIN_VERSION="skip"
@@ -462,16 +466,7 @@ resolve_latest_version() {
}
current_build_commit() {
python3 - <<'PY'
import json
import pathlib
path = pathlib.Path("dist/build-info.json")
if not path.exists():
print("")
else:
print(json.loads(path.read_text()).get("commit", ""))
PY
parallels_package_current_build_commit
}
source_tree_dirty_for_build() {
@@ -479,46 +474,50 @@ source_tree_dirty_for_build() {
}
acquire_build_lock() {
local owner_pid=""
while ! mkdir "$BUILD_LOCK_DIR" 2>/dev/null; do
if [[ -f "$BUILD_LOCK_DIR/pid" ]]; then
owner_pid="$(cat "$BUILD_LOCK_DIR/pid" 2>/dev/null || true)"
if [[ -n "$owner_pid" ]] && ! kill -0 "$owner_pid" >/dev/null 2>&1; then
warn "Removing stale Parallels build lock"
rm -rf "$BUILD_LOCK_DIR"
continue
fi
fi
sleep 1
done
printf '%s\n' "$$" >"$BUILD_LOCK_DIR/pid"
parallels_package_acquire_build_lock "$BUILD_LOCK_DIR"
}
release_build_lock() {
if [[ -d "$BUILD_LOCK_DIR" ]]; then
rm -rf "$BUILD_LOCK_DIR"
fi
parallels_package_release_build_lock "$BUILD_LOCK_DIR"
}
ensure_current_build() {
local head build_commit
acquire_build_lock
local head build_commit rc lock_owned
lock_owned=0
if [[ "${OPENCLAW_PARALLELS_BUILD_LOCK_HELD:-0}" != "1" ]]; then
acquire_build_lock
lock_owned=1
fi
head="$(git rev-parse HEAD)"
build_commit="$(current_build_commit)"
if [[ "$build_commit" == "$head" ]] && ! source_tree_dirty_for_build; then
release_build_lock
if [[ "$lock_owned" -eq 1 ]]; then
release_build_lock
fi
return
fi
say "Build dist for current head"
set +e
pnpm build
rc=$?
if [[ $rc -eq 0 ]]; then
parallels_package_assert_no_generated_drift
rc=$?
fi
build_commit="$(current_build_commit)"
release_build_lock
[[ "$build_commit" == "$head" ]] || die "dist/build-info.json still does not match HEAD after build"
set -e
if [[ "$lock_owned" -eq 1 ]]; then
release_build_lock
fi
[[ $rc -eq 0 ]] || return "$rc"
if [[ "$build_commit" != "$head" ]]; then
warn "dist/build-info.json still does not match HEAD after build"
return 1
fi
}
write_package_dist_inventory() {
node --import tsx --input-type=module --eval \
'import { writePackageDistInventory } from "./src/infra/package-dist-inventory.ts"; await writePackageDistInventory(process.cwd());'
parallels_package_write_dist_inventory
}
extract_package_version_from_tgz() {
@@ -526,7 +525,7 @@ extract_package_version_from_tgz() {
}
pack_main_tgz() {
local short_head pkg packed_commit
local short_head pkg packed_commit rc
if [[ -n "$TARGET_PACKAGE_SPEC" ]]; then
say "Pack target package tgz: $TARGET_PACKAGE_SPEC"
pkg="$(
@@ -540,13 +539,21 @@ pack_main_tgz() {
return
fi
say "Pack current main tgz"
ensure_current_build
write_package_dist_inventory
short_head="$(git rev-parse --short HEAD)"
pkg="$(
npm pack --ignore-scripts --json --pack-destination "$MAIN_TGZ_DIR" \
| python3 -c 'import json, sys; data = json.load(sys.stdin); print(data[-1]["filename"])'
)"
acquire_build_lock
set +e
{
OPENCLAW_PARALLELS_BUILD_LOCK_HELD=1 ensure_current_build &&
write_package_dist_inventory &&
short_head="$(git rev-parse --short HEAD)" &&
pkg="$(
npm pack --ignore-scripts --json --pack-destination "$MAIN_TGZ_DIR" \
| python3 -c 'import json, sys; data = json.load(sys.stdin); print(data[-1]["filename"])'
)"
}
rc=$?
set -e
release_build_lock
[[ $rc -eq 0 ]] || return "$rc"
MAIN_TGZ_PATH="$MAIN_TGZ_DIR/openclaw-main-$short_head.tgz"
cp "$MAIN_TGZ_DIR/$pkg" "$MAIN_TGZ_PATH"
packed_commit="$(extract_package_build_commit_from_tgz "$MAIN_TGZ_PATH")"
@@ -780,10 +787,11 @@ phase_run() {
local timeout_s="$2"
shift 2
local log_path pid start rc timed_out
local log_path pid start rc timed_out next_warn summary
log_path="$(phase_log_path "$phase_id")"
say "$phase_id"
start=$SECONDS
next_warn=$((start + PHASE_STALE_WARN_S))
timed_out=0
(
@@ -792,6 +800,12 @@ phase_run() {
pid=$!
while kill -0 "$pid" >/dev/null 2>&1; do
if (( SECONDS >= next_warn )); then
summary="$(parallels_log_progress_extract python3 "$log_path")"
[[ -n "$summary" ]] || summary="waiting for first log line"
warn "$phase_id still running after $((SECONDS - start))s: $summary"
next_warn=$((SECONDS + PHASE_STALE_WARN_S))
fi
if (( SECONDS - start >= timeout_s )); then
timed_out=1
kill "$pid" >/dev/null 2>&1 || true

View File

@@ -3,6 +3,7 @@ set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
source "$ROOT_DIR/scripts/e2e/lib/parallels-macos-common.sh"
source "$ROOT_DIR/scripts/e2e/lib/parallels-package-common.sh"
VM_NAME="macOS Tahoe"
SNAPSHOT_HINT="macOS 26.3.1 latest"
@@ -57,6 +58,7 @@ TIMEOUT_DASHBOARD_S=180
TIMEOUT_SNAPSHOT_S=360
TIMEOUT_CURRENT_USER_PRLCTL_S=45
TIMEOUT_DISCORD_S=180
PHASE_STALE_WARN_S=60
FRESH_MAIN_VERSION="skip"
LATEST_INSTALLED_VERSION="skip"
@@ -1135,7 +1137,7 @@ extract_package_build_commit_from_tgz() {
}
pack_main_tgz() {
local short_head pkg packed_commit
local short_head pkg packed_commit rc
if target_package_installs_directly; then
say "Use direct guest install for target package spec: $TARGET_PACKAGE_SPEC"
TARGET_EXPECT_VERSION="$(npm view "$TARGET_PACKAGE_SPEC" version --userconfig "$(mktemp)")"
@@ -1155,14 +1157,22 @@ pack_main_tgz() {
return
fi
say "Pack current main tgz"
ensure_current_build
write_package_dist_inventory
stage_pack_runtime_deps
short_head="$(git rev-parse --short HEAD)"
pkg="$(
npm pack --ignore-scripts --json --pack-destination "$MAIN_TGZ_DIR" \
| python3 -c 'import json, sys; data = json.load(sys.stdin); print(data[-1]["filename"])'
)"
acquire_build_lock
set +e
{
OPENCLAW_PARALLELS_BUILD_LOCK_HELD=1 ensure_current_build &&
write_package_dist_inventory &&
stage_pack_runtime_deps &&
short_head="$(git rev-parse --short HEAD)" &&
pkg="$(
npm pack --ignore-scripts --json --pack-destination "$MAIN_TGZ_DIR" \
| python3 -c 'import json, sys; data = json.load(sys.stdin); print(data[-1]["filename"])'
)"
}
rc=$?
set -e
release_build_lock
[[ $rc -eq 0 ]] || return "$rc"
MAIN_TGZ_PATH="$MAIN_TGZ_DIR/openclaw-main-$short_head.tgz"
cp "$MAIN_TGZ_DIR/$pkg" "$MAIN_TGZ_PATH"
packed_commit="$(extract_package_build_commit_from_tgz "$MAIN_TGZ_PATH")"
@@ -1182,16 +1192,7 @@ verify_target_version() {
}
current_build_commit() {
python3 - <<'PY'
import json
import pathlib
path = pathlib.Path("dist/build-info.json")
if not path.exists():
print("")
else:
print(json.loads(path.read_text()).get("commit", ""))
PY
parallels_package_current_build_commit
}
current_control_ui_ready() {
@@ -1199,49 +1200,59 @@ current_control_ui_ready() {
}
acquire_build_lock() {
local owner_pid=""
while ! mkdir "$BUILD_LOCK_DIR" 2>/dev/null; do
if [[ -f "$BUILD_LOCK_DIR/pid" ]]; then
owner_pid="$(cat "$BUILD_LOCK_DIR/pid" 2>/dev/null || true)"
if [[ -n "$owner_pid" ]] && ! kill -0 "$owner_pid" >/dev/null 2>&1; then
warn "Removing stale Parallels build lock"
rm -rf "$BUILD_LOCK_DIR"
continue
fi
fi
sleep 1
done
printf '%s\n' "$$" >"$BUILD_LOCK_DIR/pid"
parallels_package_acquire_build_lock "$BUILD_LOCK_DIR"
}
release_build_lock() {
if [[ -d "$BUILD_LOCK_DIR" ]]; then
rm -rf "$BUILD_LOCK_DIR"
fi
parallels_package_release_build_lock "$BUILD_LOCK_DIR"
}
ensure_current_build() {
local head build_commit
acquire_build_lock
local head build_commit rc lock_owned
lock_owned=0
if [[ "${OPENCLAW_PARALLELS_BUILD_LOCK_HELD:-0}" != "1" ]]; then
acquire_build_lock
lock_owned=1
fi
head="$(git rev-parse HEAD)"
build_commit="$(current_build_commit)"
if [[ "$build_commit" == "$head" ]] && current_control_ui_ready; then
release_build_lock
if [[ "$lock_owned" -eq 1 ]]; then
release_build_lock
fi
return
fi
say "Build dist for current head"
set +e
pnpm build
say "Build Control UI for current head"
pnpm ui:build
rc=$?
if [[ $rc -eq 0 ]]; then
parallels_package_assert_no_generated_drift
rc=$?
fi
if [[ $rc -eq 0 ]]; then
say "Build Control UI for current head"
pnpm ui:build
rc=$?
fi
build_commit="$(current_build_commit)"
release_build_lock
[[ "$build_commit" == "$head" ]] || die "dist/build-info.json still does not match HEAD after build"
current_control_ui_ready || die "dist/control-ui/index.html missing after ui build"
set -e
if [[ "$lock_owned" -eq 1 ]]; then
release_build_lock
fi
[[ $rc -eq 0 ]] || return "$rc"
if [[ "$build_commit" != "$head" ]]; then
warn "dist/build-info.json still does not match HEAD after build"
return 1
fi
if ! current_control_ui_ready; then
warn "dist/control-ui/index.html missing after ui build"
return 1
fi
}
write_package_dist_inventory() {
node --import tsx --input-type=module --eval \
'import { writePackageDistInventory } from "./src/infra/package-dist-inventory.ts"; await writePackageDistInventory(process.cwd());'
parallels_package_write_dist_inventory
}
stage_pack_runtime_deps() {
@@ -1721,10 +1732,11 @@ phase_run() {
local timeout_s="$2"
shift 2
local log_path pid start rc timed_out
local log_path pid start rc timed_out next_warn summary
log_path="$(phase_log_path "$phase_id")"
say "$phase_id"
start=$SECONDS
next_warn=$((start + PHASE_STALE_WARN_S))
timed_out=0
(
@@ -1733,6 +1745,12 @@ phase_run() {
pid=$!
while child_job_running "$pid"; do
if (( SECONDS >= next_warn )); then
summary="$(parallels_log_progress_extract python3 "$log_path")"
[[ -n "$summary" ]] || summary="waiting for first log line"
warn "$phase_id still running after $((SECONDS - start))s: $summary"
next_warn=$((SECONDS + PHASE_STALE_WARN_S))
fi
if (( SECONDS - start >= timeout_s )); then
timed_out=1
kill "$pid" >/dev/null 2>&1 || true

View File

@@ -3,6 +3,7 @@ set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
source "$ROOT_DIR/scripts/e2e/lib/parallels-macos-common.sh"
source "$ROOT_DIR/scripts/e2e/lib/parallels-package-common.sh"
MACOS_VM="macOS Tahoe"
WINDOWS_VM="Windows 11"
@@ -19,6 +20,7 @@ JSON_OUTPUT=0
RUN_DIR="$(mktemp -d /tmp/openclaw-parallels-npm-update.XXXXXX)"
MAIN_TGZ_DIR="$(mktemp -d)"
MAIN_TGZ_PATH=""
BUILD_LOCK_DIR="${TMPDIR:-/tmp}/openclaw-parallels-build.lock"
WINDOWS_UPDATE_SCRIPT_PATH=""
SERVER_PID=""
HOST_IP=""
@@ -31,7 +33,7 @@ UPDATE_EXPECTED_NEEDLE=""
API_KEY_VALUE=""
PROGRESS_INTERVAL_S=15
PROGRESS_STALE_S=60
TIMEOUT_UPDATE_S="${OPENCLAW_PARALLELS_NPM_UPDATE_TIMEOUT_S:-900}"
TIMEOUT_UPDATE_S="${OPENCLAW_PARALLELS_NPM_UPDATE_TIMEOUT_S:-1200}"
TIMEOUT_UPDATE_POLL_GRACE_S=60
child_job_running() {
@@ -328,26 +330,53 @@ sock.close()
PY
}
current_build_commit() {
parallels_package_current_build_commit
}
source_tree_dirty_for_build() {
[[ -n "$(git status --porcelain -- src ui packages extensions package.json pnpm-lock.yaml 'tsconfig*.json' 2>/dev/null)" ]]
}
ensure_current_build() {
local build_commit head rc
head="$(git rev-parse HEAD)"
build_commit="$(current_build_commit)"
if [[ "$build_commit" == "$head" ]] && ! source_tree_dirty_for_build; then
return 0
fi
say "Build dist for current head"
pnpm build
rc=$?
if [[ $rc -eq 0 ]]; then
parallels_package_assert_no_generated_drift
rc=$?
fi
return "$rc"
}
write_package_dist_inventory() {
node --import tsx --input-type=module --eval \
'import { writePackageDistInventory } from "./src/infra/package-dist-inventory.ts"; await writePackageDistInventory(process.cwd());'
parallels_package_write_dist_inventory
}
pack_main_tgz() {
local pkg
local pkg rc
CURRENT_HEAD="$(git rev-parse HEAD)"
CURRENT_HEAD_SHORT="$(git rev-parse --short=7 HEAD)"
ensure_current_build
write_package_dist_inventory
pkg="$(
npm pack --ignore-scripts --json --pack-destination "$MAIN_TGZ_DIR" \
| "$PYTHON_BIN" -c 'import json, sys; data = json.load(sys.stdin); print(data[-1]["filename"])'
)"
parallels_package_acquire_build_lock "$BUILD_LOCK_DIR"
set +e
{
ensure_current_build &&
write_package_dist_inventory &&
pkg="$(
npm pack --ignore-scripts --json --pack-destination "$MAIN_TGZ_DIR" \
| "$PYTHON_BIN" -c 'import json, sys; data = json.load(sys.stdin); print(data[-1]["filename"])'
)"
}
rc=$?
set -e
parallels_package_release_build_lock "$BUILD_LOCK_DIR"
[[ $rc -eq 0 ]] || return "$rc"
MAIN_TGZ_PATH="$MAIN_TGZ_DIR/openclaw-main-$CURRENT_HEAD_SHORT.tgz"
cp "$MAIN_TGZ_DIR/$pkg" "$MAIN_TGZ_PATH"
}
@@ -464,20 +493,36 @@ function Wait-GatewayRpcReady {
for ($attempt = 1; $attempt -le $Attempts; $attempt++) {
Write-ProgressLog "update.gateway-status.attempt-$attempt"
try {
Invoke-Logged 'openclaw gateway status' { & $OpenClawPath gateway status --deep --require-rpc }
return
$statusOutput = Invoke-CaptureLogged 'openclaw gateway status' { & $OpenClawPath gateway status --deep --require-rpc }
if ($statusOutput -match 'Read probe:\s*failed') {
throw 'gateway status returned without RPC read readiness'
}
return $true
} catch {
if ($attempt -ge $Attempts) {
throw
return $false
}
Write-ProgressLog "update.gateway-status.retry-$attempt"
Start-Sleep -Seconds $SleepSeconds
}
}
return $false
}
function Stop-GatewayScheduledTaskIfPresent {
$previousNativeErrorPreference = $PSNativeCommandUseErrorActionPreference
try {
$PSNativeCommandUseErrorActionPreference = $false
schtasks /End /TN 'OpenClaw Gateway' 2>$null | Out-Null
} catch {
} finally {
$PSNativeCommandUseErrorActionPreference = $previousNativeErrorPreference
}
}
function Stop-OpenClawGatewayProcesses {
Write-ProgressLog 'update.stop-old-gateway'
Stop-GatewayScheduledTaskIfPresent
$patterns = @(
'openclaw-gateway',
'openclaw.*gateway --port 18789',
@@ -508,7 +553,112 @@ function Stop-OpenClawGatewayProcesses {
ForEach-Object {
Stop-Process -Id $_.OwningProcess -Force -ErrorAction SilentlyContinue
}
Start-Sleep -Seconds 2
for ($attempt = 1; $attempt -le 20; $attempt++) {
$listeners = Get-NetTCPConnection -LocalPort 18789 -State Listen -ErrorAction SilentlyContinue
if (-not $listeners) {
return
}
$listeners |
ForEach-Object {
Stop-Process -Id $_.OwningProcess -Force -ErrorAction SilentlyContinue
}
Start-Sleep -Seconds 1
}
$remaining = Get-NetTCPConnection -LocalPort 18789 -State Listen -ErrorAction SilentlyContinue
if ($remaining) {
$pids = ($remaining | Select-Object -ExpandProperty OwningProcess -Unique) -join ', '
throw "gateway listener still active on port 18789 after stop attempts: $pids"
}
}
function Stop-OpenClawUpdateProcesses {
Write-ProgressLog 'update.stop-stale-update'
$patterns = @(
'openclaw.* update --tag ',
'openclaw.* completion --write-state'
)
Get-CimInstance Win32_Process -ErrorAction SilentlyContinue |
Where-Object {
$commandLine = $_.CommandLine
if (-not $commandLine) {
$false
} else {
$matched = $false
foreach ($pattern in $patterns) {
if ($commandLine -match $pattern) {
$matched = $true
break
}
}
$matched
}
} |
Sort-Object ParentProcessId -Descending |
ForEach-Object {
Stop-Process -Id $_.ProcessId -Force -ErrorAction SilentlyContinue
}
}
function Invoke-OpenClawUpdateWithTimeout {
param(
[Parameter(Mandatory = $true)][string]$OpenClawPath,
[Parameter(Mandatory = $true)][string]$UpdateTarget,
[int]$TimeoutSeconds = 600
)
$updateJob = Start-Job -ScriptBlock {
param([string]$Path, [string]$Target)
$output = & $Path update --tag $Target --yes --json *>&1
[pscustomobject]@{
ExitCode = $LASTEXITCODE
Output = ($output | Out-String).Trim()
}
} -ArgumentList $OpenClawPath, $UpdateTarget
$completed = Wait-Job $updateJob -Timeout $TimeoutSeconds
if ($null -ne $completed) {
$result = Receive-Job $updateJob
if ($null -ne $result.Output -and $result.Output.Length -gt 0) {
$result.Output | Tee-Object -FilePath $LogPath -Append | Out-Null
}
Remove-Job $updateJob -Force -ErrorAction SilentlyContinue
if ($result.ExitCode -ne 0) {
throw "openclaw update failed with exit code $($result.ExitCode)"
}
return
}
Stop-Job $updateJob -ErrorAction SilentlyContinue
Remove-Job $updateJob -Force -ErrorAction SilentlyContinue
Write-ProgressLog 'update.openclaw-update.timeout'
'openclaw update timed out after package install window; killing stale update/completion processes and verifying installed version' | Tee-Object -FilePath $LogPath -Append | Out-Null
Stop-OpenClawUpdateProcesses
}
function Start-GatewayRunFallback {
param(
[Parameter(Mandatory = $true)][string]$OpenClawPath
)
Write-ProgressLog 'update.gateway-run-fallback'
Stop-OpenClawGatewayProcesses
$entry = Join-Path $env:APPDATA 'npm\node_modules\openclaw\dist\index.js'
if (-not (Test-Path $entry)) {
throw "openclaw dist entry missing: $entry"
}
$node = (Get-Command node.exe -ErrorAction Stop).Source
$stdout = Join-Path $env:TEMP 'openclaw-parallels-npm-update-gateway.log'
$stderr = Join-Path $env:TEMP 'openclaw-parallels-npm-update-gateway.err.log'
Start-Process -FilePath $node -ArgumentList @($entry, 'gateway', 'run', '--bind', 'loopback', '--port', '18789', '--force') -WindowStyle Hidden -RedirectStandardOutput $stdout -RedirectStandardError $stderr | Out-Null
if (-not (Wait-GatewayRpcReady -OpenClawPath $OpenClawPath -Attempts 20 -SleepSeconds 3)) {
if (Test-Path $stdout) {
Get-Content $stdout -Tail 80 | Tee-Object -FilePath $LogPath -Append | Out-Null
}
if (Test-Path $stderr) {
Get-Content $stderr -Tail 80 | Tee-Object -FilePath $LogPath -Append | Out-Null
}
throw 'gateway did not become RPC-ready after run fallback'
}
}
function Complete-WorkspaceSetup {
@@ -568,17 +718,15 @@ function Restart-GatewayWithRecovery {
Remove-Job $restartJob -Force -ErrorAction SilentlyContinue
Write-ProgressLog 'update.gateway-status'
try {
Wait-GatewayRpcReady -OpenClawPath $OpenClawPath
if (Wait-GatewayRpcReady -OpenClawPath $OpenClawPath) {
return
} catch {
if (-not $restartFailed) {
throw
}
Write-ProgressLog 'update.gateway-start-recover'
Invoke-Logged 'openclaw gateway start' { & $OpenClawPath gateway start }
Write-ProgressLog 'update.gateway-status-recover'
Wait-GatewayRpcReady -OpenClawPath $OpenClawPath
}
Write-ProgressLog 'update.gateway-start-recover'
Stop-OpenClawGatewayProcesses
Invoke-Logged 'openclaw gateway start' { & $OpenClawPath gateway start }
Write-ProgressLog 'update.gateway-status-recover'
if (-not (Wait-GatewayRpcReady -OpenClawPath $OpenClawPath)) {
Start-GatewayRunFallback -OpenClawPath $OpenClawPath
}
}
@@ -597,7 +745,7 @@ try {
$openclaw = Join-Path $env:APPDATA 'npm\openclaw.cmd'
Stop-OpenClawGatewayProcesses
Write-ProgressLog 'update.openclaw-update'
Invoke-Logged 'openclaw update' { & $openclaw update --tag $UpdateTarget --yes --json }
Invoke-OpenClawUpdateWithTimeout -OpenClawPath $openclaw -UpdateTarget $UpdateTarget
Write-ProgressLog 'update.verify-version'
$version = Invoke-CaptureLogged 'openclaw --version' { & $openclaw --version }
if ($ExpectedNeedle -and $version -notmatch [regex]::Escape($ExpectedNeedle)) {
@@ -617,7 +765,7 @@ try {
Restart-GatewayWithRecovery -OpenClawPath $openclaw
Complete-WorkspaceSetup
Write-ProgressLog 'update.agent-turn'
Invoke-CaptureLogged 'openclaw agent' { & $openclaw agent --agent main --session-id $SessionId --message 'Reply with exact ASCII text OK only.' --json } | Out-Null
Invoke-CaptureLogged 'openclaw agent' { & $openclaw agent --local --agent main --session-id $SessionId --message 'Reply with exact ASCII text OK only.' --json } | Out-Null
$exitCode = $LASTEXITCODE
if ($null -eq $exitCode) {
$exitCode = 0
@@ -791,20 +939,62 @@ Write-Output \$version
if ('$expected_needle' -and \$version -notmatch [regex]::Escape('$expected_needle')) {
throw "version mismatch after transport loss: expected substring $expected_needle"
}
function Test-GatewayWritable {
param([string]\$Path)
\$statusOutput = & \$Path gateway status --deep --require-rpc *>&1
if (\$null -ne \$statusOutput) {
\$statusOutput | Write-Output
}
if (\$LASTEXITCODE -ne 0) {
return \$false
}
\$statusText = (\$statusOutput | Out-String)
return (\$statusText -notmatch 'Read probe:\s*failed')
}
function Stop-GatewayListeners {
\$previousNativeErrorPreference = \$PSNativeCommandUseErrorActionPreference
try {
\$PSNativeCommandUseErrorActionPreference = \$false
schtasks /End /TN 'OpenClaw Gateway' 2>\$null | Out-Null
} catch {
} finally {
\$PSNativeCommandUseErrorActionPreference = \$previousNativeErrorPreference
}
Get-CimInstance Win32_Process -ErrorAction SilentlyContinue |
Where-Object {
\$_.CommandLine -and (
\$_.CommandLine -match 'openclaw.*gateway --port 18789' -or
\$_.CommandLine -match 'openclaw.*gateway run' -or
\$_.CommandLine -match 'dist\\\\index\\.js gateway --port 18789'
)
} |
ForEach-Object {
Stop-Process -Id \$_.ProcessId -Force -ErrorAction SilentlyContinue
}
for (\$i = 0; \$i -lt 20; \$i++) {
\$listeners = Get-NetTCPConnection -LocalPort 18789 -State Listen -ErrorAction SilentlyContinue
if (-not \$listeners) {
return
}
\$listeners | ForEach-Object {
Stop-Process -Id \$_.OwningProcess -Force -ErrorAction SilentlyContinue
}
Start-Sleep -Seconds 1
}
}
\$gatewayReady = \$false
for (\$i = 0; \$i -lt 6; \$i++) {
& \$openclaw gateway status --deep --require-rpc
if (\$LASTEXITCODE -eq 0) {
if (Test-GatewayWritable \$openclaw) {
\$gatewayReady = \$true
break
}
Start-Sleep -Seconds 2
}
if (-not \$gatewayReady) {
Stop-GatewayListeners
& \$openclaw gateway restart
for (\$i = 0; \$i -lt 6; \$i++) {
& \$openclaw gateway status --deep --require-rpc
if (\$LASTEXITCODE -eq 0) {
if (Test-GatewayWritable \$openclaw) {
\$gatewayReady = \$true
break
}
@@ -812,10 +1002,25 @@ if (-not \$gatewayReady) {
}
}
if (-not \$gatewayReady) {
Stop-GatewayListeners
& \$openclaw gateway start
for (\$i = 0; \$i -lt 6; \$i++) {
& \$openclaw gateway status --deep --require-rpc
if (\$LASTEXITCODE -eq 0) {
if (Test-GatewayWritable \$openclaw) {
\$gatewayReady = \$true
break
}
Start-Sleep -Seconds 2
}
}
if (-not \$gatewayReady) {
Stop-GatewayListeners
\$entry = Join-Path \$env:APPDATA 'npm\\node_modules\\openclaw\\dist\\index.js'
\$node = (Get-Command node.exe -ErrorAction Stop).Source
\$stdout = Join-Path \$env:TEMP 'openclaw-parallels-npm-update-recover-gateway.log'
\$stderr = Join-Path \$env:TEMP 'openclaw-parallels-npm-update-recover-gateway.err.log'
Start-Process -FilePath \$node -ArgumentList @(\$entry, 'gateway', 'run', '--bind', 'loopback', '--port', '18789', '--force') -WindowStyle Hidden -RedirectStandardOutput \$stdout -RedirectStandardError \$stderr | Out-Null
for (\$i = 0; \$i -lt 20; \$i++) {
if (Test-GatewayWritable \$openclaw) {
\$gatewayReady = \$true
break
}
@@ -848,7 +1053,7 @@ New-Item -ItemType Directory -Path \$stateDir -Force | Out-Null
}
'@ | Set-Content -Path (Join-Path \$stateDir 'workspace-state.json') -Encoding UTF8
Remove-Item (Join-Path \$workspace 'BOOTSTRAP.md') -Force -ErrorAction SilentlyContinue
& \$openclaw agent --agent main --session-id 'parallels-npm-update-windows-transport-recovery-$expected_needle' --message 'Reply with exact ASCII text OK only.' --json
& \$openclaw agent --local --agent main --session-id 'parallels-npm-update-windows-transport-recovery-$expected_needle' --message 'Reply with exact ASCII text OK only.' --json
EOF
)"
local rc=$?
@@ -883,37 +1088,6 @@ stop_timeout_guard() {
wait "$pid" 2>/dev/null || true
}
extract_log_progress() {
local log_path="$1"
"$PYTHON_BIN" - "$log_path" <<'PY'
import pathlib
import sys
path = pathlib.Path(sys.argv[1])
if not path.exists():
print("")
raise SystemExit(0)
text = path.read_text(encoding="utf-8", errors="replace")
lines = [line.strip() for line in text.splitlines() if line.strip()]
for line in reversed(lines):
if line.startswith("==> "):
print(line[4:].strip())
raise SystemExit(0)
for line in reversed(lines):
if line.startswith("warn:") or line.startswith("error:"):
print(line)
raise SystemExit(0)
if lines:
print(lines[-1][:240])
else:
print("")
PY
}
dump_log_tail() {
local label="$1"
local log_path="$2"
@@ -925,44 +1099,7 @@ dump_log_tail() {
monitor_jobs_progress() {
local group="$1"
shift
local labels=()
local pids=()
local logs=()
local last_progress=()
local last_print=()
local i summary now running
while [[ $# -gt 0 ]]; do
labels+=("$1")
pids+=("$2")
logs+=("$3")
last_progress+=("")
last_print+=(0)
shift 3
done
say "$group progress; run dir: $RUN_DIR"
while :; do
running=0
now=$SECONDS
for ((i = 0; i < ${#pids[@]}; i++)); do
if ! child_job_running "${pids[$i]}"; then
continue
fi
running=1
summary="$(extract_log_progress "${logs[$i]}")"
[[ -n "$summary" ]] || summary="waiting for first log line"
if [[ "${last_progress[i]}" != "$summary" ]] || (( now - last_print[i] >= PROGRESS_STALE_S )); then
say "$group ${labels[$i]}: $summary"
last_progress[i]="$summary"
last_print[i]=$now
fi
done
(( running )) || break
sleep "$PROGRESS_INTERVAL_S"
done
parallels_monitor_jobs_progress "$group" "$PROGRESS_INTERVAL_S" "$PROGRESS_STALE_S" "$PYTHON_BIN" "$$" "$@"
}
extract_last_version() {

View File

@@ -1,6 +1,9 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
source "$ROOT_DIR/scripts/e2e/lib/parallels-package-common.sh"
VM_NAME="Windows 11"
SNAPSHOT_HINT="pre-openclaw-native-e2e-2026-03-12"
MODE="both"
@@ -41,7 +44,7 @@ BUILD_LOCK_DIR="${TMPDIR:-/tmp}/openclaw-parallels-build.lock"
TIMEOUT_SNAPSHOT_S=240
TIMEOUT_GIT_SETUP_S=1200
TIMEOUT_INSTALL_S=420
TIMEOUT_UPDATE_S="${OPENCLAW_PARALLELS_WINDOWS_UPDATE_TIMEOUT_S:-1800}"
TIMEOUT_UPDATE_S="${OPENCLAW_PARALLELS_WINDOWS_UPDATE_TIMEOUT_S:-1200}"
TIMEOUT_UPDATE_POLL_GRACE_S=60
TIMEOUT_VERIFY_S=120
TIMEOUT_ONBOARD_S=600
@@ -49,6 +52,7 @@ TIMEOUT_ONBOARD_PHASE_S=$((TIMEOUT_ONBOARD_S + 120))
# verify_gateway_reachable runs six 30s probes plus short retry sleeps.
TIMEOUT_GATEWAY_S=420
TIMEOUT_AGENT_S=600
PHASE_STALE_WARN_S=60
FRESH_MAIN_STATUS="skip"
FRESH_MAIN_VERSION="skip"
@@ -691,10 +695,11 @@ phase_run() {
local timeout_s="$2"
shift 2
local log_path pid start rc timed_out
local log_path pid start rc timed_out next_warn summary
log_path="$(phase_log_path "$phase_id")"
say "$phase_id"
start=$SECONDS
next_warn=$((start + PHASE_STALE_WARN_S))
timed_out=0
(
@@ -703,6 +708,12 @@ phase_run() {
pid=$!
while child_job_running "$pid"; do
if (( SECONDS >= next_warn )); then
summary="$(parallels_log_progress_extract python3 "$log_path")"
[[ -n "$summary" ]] || summary="waiting for first log line"
warn "$phase_id still running after $((SECONDS - start))s: $summary"
next_warn=$((SECONDS + PHASE_STALE_WARN_S))
fi
if (( SECONDS - start >= timeout_s )); then
timed_out=1
kill "$pid" >/dev/null 2>&1 || true
@@ -848,16 +859,7 @@ PY
}
current_build_commit() {
python3 - <<'PY'
import json
import pathlib
path = pathlib.Path("dist/build-info.json")
if not path.exists():
print("")
else:
print(json.loads(path.read_text()).get("commit", ""))
PY
parallels_package_current_build_commit
}
source_tree_dirty_for_build() {
@@ -865,46 +867,50 @@ source_tree_dirty_for_build() {
}
acquire_build_lock() {
local owner_pid=""
while ! mkdir "$BUILD_LOCK_DIR" 2>/dev/null; do
if [[ -f "$BUILD_LOCK_DIR/pid" ]]; then
owner_pid="$(cat "$BUILD_LOCK_DIR/pid" 2>/dev/null || true)"
if [[ -n "$owner_pid" ]] && ! kill -0 "$owner_pid" >/dev/null 2>&1; then
warn "Removing stale Parallels build lock"
rm -rf "$BUILD_LOCK_DIR"
continue
fi
fi
sleep 1
done
printf '%s\n' "$$" >"$BUILD_LOCK_DIR/pid"
parallels_package_acquire_build_lock "$BUILD_LOCK_DIR"
}
release_build_lock() {
if [[ -d "$BUILD_LOCK_DIR" ]]; then
rm -rf "$BUILD_LOCK_DIR"
fi
parallels_package_release_build_lock "$BUILD_LOCK_DIR"
}
ensure_current_build() {
local head build_commit
acquire_build_lock
local head build_commit rc lock_owned
lock_owned=0
if [[ "${OPENCLAW_PARALLELS_BUILD_LOCK_HELD:-0}" != "1" ]]; then
acquire_build_lock
lock_owned=1
fi
head="$(git rev-parse HEAD)"
build_commit="$(current_build_commit)"
if [[ "$build_commit" == "$head" ]] && ! source_tree_dirty_for_build; then
release_build_lock
if [[ "$lock_owned" -eq 1 ]]; then
release_build_lock
fi
return
fi
say "Build dist for current head"
set +e
pnpm build
rc=$?
if [[ $rc -eq 0 ]]; then
parallels_package_assert_no_generated_drift
rc=$?
fi
build_commit="$(current_build_commit)"
release_build_lock
[[ "$build_commit" == "$head" ]] || die "dist/build-info.json still does not match HEAD after build"
set -e
if [[ "$lock_owned" -eq 1 ]]; then
release_build_lock
fi
[[ $rc -eq 0 ]] || return "$rc"
if [[ "$build_commit" != "$head" ]]; then
warn "dist/build-info.json still does not match HEAD after build"
return 1
fi
}
write_package_dist_inventory() {
node --import tsx --input-type=module --eval \
'import { writePackageDistInventory } from "./src/infra/package-dist-inventory.ts"; await writePackageDistInventory(process.cwd());'
parallels_package_write_dist_inventory
}
ensure_guest_git() {
@@ -951,7 +957,7 @@ ensure_mingit_zip() {
}
pack_main_tgz() {
local short_head pkg packed_commit
local short_head pkg packed_commit rc
ensure_mingit_zip
if [[ -n "$TARGET_PACKAGE_SPEC" ]]; then
say "Pack target package tgz: $TARGET_PACKAGE_SPEC"
@@ -966,13 +972,21 @@ pack_main_tgz() {
return
fi
say "Pack current main tgz"
ensure_current_build
write_package_dist_inventory
short_head="$(git rev-parse --short HEAD)"
pkg="$(
npm pack --ignore-scripts --json --pack-destination "$MAIN_TGZ_DIR" \
| python3 -c 'import json, sys; data = json.load(sys.stdin); print(data[-1]["filename"])'
)"
acquire_build_lock
set +e
{
OPENCLAW_PARALLELS_BUILD_LOCK_HELD=1 ensure_current_build &&
write_package_dist_inventory &&
short_head="$(git rev-parse --short HEAD)" &&
pkg="$(
npm pack --ignore-scripts --json --pack-destination "$MAIN_TGZ_DIR" \
| python3 -c 'import json, sys; data = json.load(sys.stdin); print(data[-1]["filename"])'
)"
}
rc=$?
set -e
release_build_lock
[[ $rc -eq 0 ]] || return "$rc"
MAIN_TGZ_PATH="$MAIN_TGZ_DIR/openclaw-main-$short_head.tgz"
cp "$MAIN_TGZ_DIR/$pkg" "$MAIN_TGZ_PATH"
packed_commit="$(extract_package_build_commit_from_tgz "$MAIN_TGZ_PATH")"

View File

@@ -193,6 +193,7 @@ const { defaultRuntime } = await import("../runtime.js");
const { updateCommand, updateStatusCommand, updateWizardCommand } = await import("./update-cli.js");
const updateCliShared = await import("./update-cli/shared.js");
const { resolveGitInstallDir } = updateCliShared;
const { spawnSync } = await import("node:child_process");
type UpdateCliScenario = {
name: string;
@@ -458,6 +459,19 @@ describe("update-cli", () => {
setStdoutTty(false);
});
it("bounds completion cache refresh during update follow-up", async () => {
const root = createCaseDir("openclaw-completion-timeout");
pathExists.mockResolvedValue(true);
await updateCliShared.tryWriteCompletionCache(root, false);
expect(spawnSync).toHaveBeenCalledWith(
expect.any(String),
[path.join(root, "openclaw.mjs"), "completion", "--write-state"],
expect.objectContaining({ timeout: 30_000 }),
);
});
it("respawns into the updated package root before running post-update tasks", async () => {
const { entrypoints } = setupUpdatedRootRefresh();

View File

@@ -258,6 +258,8 @@ export async function resolveGlobalManager(params: {
return byPresence ?? "npm";
}
const COMPLETION_CACHE_WRITE_TIMEOUT_MS = 30_000;
export async function tryWriteCompletionCache(root: string, jsonMode: boolean): Promise<void> {
const binPath = path.join(root, "openclaw.mjs");
if (!(await pathExists(binPath))) {
@@ -268,6 +270,7 @@ export async function tryWriteCompletionCache(root: string, jsonMode: boolean):
cwd: root,
env: process.env,
encoding: "utf-8",
timeout: COMPLETION_CACHE_WRITE_TIMEOUT_MS,
});
if (result.error) {

View File

@@ -245,6 +245,9 @@ async function collectInstalledPathErrors(params: {
if (actualSet !== null && params.unexpectedMessage) {
const expectedSet = new Set(params.expectedFiles);
for (const relativePath of params.actualFiles ?? []) {
if (NPM_UPDATE_COMPAT_SIDECAR_PATHS.has(relativePath)) {
continue;
}
if (!expectedSet.has(relativePath)) {
errors.push(params.unexpectedMessage(relativePath));
}