From 2e38e09b04cc06091c05be4015211beef82bcf9a Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Wed, 22 Apr 2026 21:59:59 +0100 Subject: [PATCH] test: harden parallels smoke harness --- .../skills/openclaw-parallels-smoke/SKILL.md | 7 +- scripts/e2e/lib/parallels-package-common.sh | 158 ++++++++ scripts/e2e/parallels-linux-smoke.sh | 98 ++--- scripts/e2e/parallels-macos-smoke.sh | 110 +++--- scripts/e2e/parallels-npm-update-smoke.sh | 341 ++++++++++++------ scripts/e2e/parallels-windows-smoke.sh | 100 ++--- src/cli/update-cli.test.ts | 14 + src/cli/update-cli/shared.ts | 3 + src/infra/update-global.ts | 3 + 9 files changed, 598 insertions(+), 236 deletions(-) create mode 100644 scripts/e2e/lib/parallels-package-common.sh diff --git a/.agents/skills/openclaw-parallels-smoke/SKILL.md b/.agents/skills/openclaw-parallels-smoke/SKILL.md index b6b8a34be43..fb4308d5eb5 100644 --- a/.agents/skills/openclaw-parallels-smoke/SKILL.md +++ b/.agents/skills/openclaw-parallels-smoke/SKILL.md @@ -22,16 +22,17 @@ Use this skill for Parallels guest workflows and smoke interpretation. Do not lo - Windows: `90m` - aggregate npm-update wrapper: `150m` If a lane hits the cap, stop there, inspect the newest `/tmp/openclaw-parallels-*` run directory and phase log, then fix or rerun the smallest affected lane. Do not keep waiting on a capped lane. -- Actual OpenClaw npm install/update phases are a stricter budget than whole lanes: install phases should finish within 7 minutes, and update phases should finish within 5 minutes. If a phase named `install-main`, `install-latest`, `install-baseline`, or `install-baseline-package` exceeds 420s, or a phase named `update-dev` / same-guest `openclaw update` exceeds 300s, treat it as a failure/harness bug and start diagnosis from that phase log. Do not wait for a longer lane cap. +- Actual OpenClaw npm install/update phases are a stricter signal than whole-lane caps: install phases should normally finish within 7 minutes, and update phases should normally show meaningful progress within 5 minutes. If a phase named `install-main`, `install-latest`, `install-baseline`, or `install-baseline-package` exceeds 420s, or a phase named `update-dev` / same-guest `openclaw update` exceeds 300s without new markers, start diagnosis from that phase log and guest process state. Current Windows update phases can still pass after roughly 10-15 minutes because `doctor --fix` may install bundled plugin runtime deps; keep the script hard cap near 20 minutes unless the log is truly stale. - For a full OS matrix, prefer running independent guest-family lanes in parallel when host capacity allows: - `timeout --foreground 75m pnpm test:parallels:macos -- --json` - `timeout --foreground 90m pnpm test:parallels:windows -- --json` - `timeout --foreground 75m pnpm test:parallels:linux -- --json` - Keep each lane in its own shell/session and track the run directory for each one. + Keep each lane in its own shell/session and track the run directory for each one. Before starting the matrix, run any required host build/package gate to completion. When current-main tgz packaging is needed, the smoke scripts hold a shared package lock through `pnpm build`, inventory/staging, and `npm pack`; if that lock is missing or broken, serialize the matrix instead of accepting concurrent `dist` mutation. - Do not run multiple smoke lanes against the same guest family at once. Tahoe lanes share the host HTTP port, and Windows/Linux lanes can collide on snapshot restore/start state if two jobs touch the same VM concurrently. - Do not run the aggregate `pnpm test:parallels:npm-update` wrapper in parallel with individual macOS/Windows/Linux smoke lanes; it touches the same guest families and snapshots. -- Do not start Parallels lanes while any host command may rebuild, clean, or restage `dist` (`pnpm build`, `pnpm ui:build`, `pnpm release:check`, `pnpm test:install:smoke`, npm pack/install smoke, or Docker lanes that run package/build prep). Run the build/package gates first, let them finish, then start the VM matrix. Concurrent `dist` mutation can make host `npm pack` fail with missing files and wastes a full VM cycle. +- Do not start Parallels lanes while any unrelated host command may rebuild, clean, or restage `dist` (`pnpm build`, `pnpm ui:build`, `pnpm release:check`, `pnpm test:install:smoke`, npm pack/install smoke, or Docker lanes that run package/build prep). Run unrelated build/package gates first, let them finish, then start the VM matrix. Concurrent `dist` mutation can make host `npm pack` fail with missing files and wastes a full VM cycle. - While running or optimizing the matrix, record wall-clock duration per lane and the slowest phase from `/tmp/openclaw-parallels-*` logs. Use that timing before changing smoke order, timeouts, or helper behavior. +- If a host build changes tracked generated files such as `src/canvas-host/a2ui/.bundle.hash`, stop before spending VM time. Commit the generated artifact separately or fix the generator drift, then rerun the smallest affected lane. - If `main` is moving under active multi-agent work, prefer a detached worktree pinned to one commit for long Parallels suites. The smoke scripts now verify the packed tgz commit instead of live `git rev-parse HEAD`, but a pinned worktree still avoids noisy rebuild/version drift during reruns. - For `openclaw update --channel dev` lanes, remember the guest clones GitHub `main`, not your local worktree. If a local fix exists but the rerun still fails inside the cloned dev checkout, do not treat that as disproof of the fix until the branch has been pushed. - For `prlctl exec`, pass the VM name before `--current-user` (`prlctl exec "$VM" --current-user ...`), not the other way around. diff --git a/scripts/e2e/lib/parallels-package-common.sh b/scripts/e2e/lib/parallels-package-common.sh new file mode 100644 index 00000000000..84141d6a89a --- /dev/null +++ b/scripts/e2e/lib/parallels-package-common.sh @@ -0,0 +1,158 @@ +#!/usr/bin/env bash + +parallels_package_current_build_commit() { + python3 - <<'PY' +import json +import pathlib + +path = pathlib.Path("dist/build-info.json") +if not path.exists(): + print("") +else: + print(json.loads(path.read_text()).get("commit", "")) +PY +} + +parallels_package_acquire_build_lock() { + local lock_dir="$1" + local owner_pid="" + while ! mkdir "$lock_dir" 2>/dev/null; do + if [[ -f "$lock_dir/pid" ]]; then + owner_pid="$(cat "$lock_dir/pid" 2>/dev/null || true)" + if [[ -n "$owner_pid" ]] && ! kill -0 "$owner_pid" >/dev/null 2>&1; then + printf 'warn: Removing stale Parallels build lock\n' >&2 + rm -rf "$lock_dir" + continue + fi + fi + sleep 1 + done + printf '%s\n' "$$" >"$lock_dir/pid" +} + +parallels_package_release_build_lock() { + local lock_dir="$1" + if [[ -d "$lock_dir" ]]; then + rm -rf "$lock_dir" + fi +} + +parallels_package_run_with_build_lock() { + local lock_dir="$1" + local rc + shift + parallels_package_acquire_build_lock "$lock_dir" + set +e + "$@" + rc=$? + set -e + parallels_package_release_build_lock "$lock_dir" + return "$rc" +} + +parallels_package_write_dist_inventory() { + node --import tsx scripts/write-npm-update-compat-sidecars.ts + node --import tsx --input-type=module --eval \ + 'import { writePackageDistInventory } from "./src/infra/package-dist-inventory.ts"; await writePackageDistInventory(process.cwd());' +} + +parallels_package_assert_no_generated_drift() { + local drift + drift="$(git status --porcelain -- src/canvas-host/a2ui/.bundle.hash 2>/dev/null || true)" + if [[ -z "$drift" ]]; then + return 0 + fi + printf 'error: generated file drift after build; commit or revert before Parallels packaging:\n%s\n' "$drift" >&2 + return 1 +} + +parallels_log_progress_extract() { + local python_bin="$1" + local log_path="$2" + "$python_bin" - "$log_path" <<'PY' +import pathlib +import sys + +path = pathlib.Path(sys.argv[1]) +if not path.exists(): + print("") + raise SystemExit(0) + +text = path.read_text(encoding="utf-8", errors="replace") +lines = [line.strip() for line in text.splitlines() if line.strip()] + +for line in reversed(lines): + if line.startswith("==> "): + print(line[4:].strip()) + raise SystemExit(0) + +for line in reversed(lines): + if line.startswith("warn:") or line.startswith("error:"): + print(line) + raise SystemExit(0) + +if lines: + print(lines[-1][:240]) +else: + print("") +PY +} + +parallels_child_job_running() { + local target="$1" + local owner="${2:-}" + local ppid + kill -0 "$target" >/dev/null 2>&1 || return 1 + if [[ -z "$owner" ]]; then + return 0 + fi + ppid="$(ps -o ppid= -p "$target" 2>/dev/null | tr -d '[:space:]')" + [[ "$ppid" == "$owner" ]] +} + +parallels_monitor_jobs_progress() { + local group="$1" + local interval_s="$2" + local stale_s="$3" + local python_bin="$4" + local owner_pid="$5" + shift 5 + + local labels=() + local pids=() + local logs=() + local last_progress=() + local last_print=() + local i summary now running + + while [[ $# -gt 0 ]]; do + labels+=("$1") + pids+=("$2") + logs+=("$3") + last_progress+=("") + last_print+=(0) + shift 3 + done + + printf '==> %s progress; run dir: %s\n' "$group" "${RUN_DIR:-unknown}" + + while :; do + running=0 + now=$SECONDS + for ((i = 0; i < ${#pids[@]}; i++)); do + if ! parallels_child_job_running "${pids[$i]}" "$owner_pid"; then + continue + fi + running=1 + summary="$(parallels_log_progress_extract "$python_bin" "${logs[$i]}")" + [[ -n "$summary" ]] || summary="waiting for first log line" + if [[ "${last_progress[i]}" != "$summary" ]] || (( now - last_print[i] >= stale_s )); then + printf '==> %s %s: %s\n' "$group" "${labels[$i]}" "$summary" + last_progress[i]="$summary" + last_print[i]=$now + fi + done + (( running )) || break + sleep "$interval_s" + done +} diff --git a/scripts/e2e/parallels-linux-smoke.sh b/scripts/e2e/parallels-linux-smoke.sh index 816785728b9..9460aa3961f 100644 --- a/scripts/e2e/parallels-linux-smoke.sh +++ b/scripts/e2e/parallels-linux-smoke.sh @@ -1,6 +1,9 @@ #!/usr/bin/env bash set -euo pipefail +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +source "$ROOT_DIR/scripts/e2e/lib/parallels-package-common.sh" + VM_NAME="Ubuntu 24.04.3 ARM64" VM_NAME_EXPLICIT=0 SNAPSHOT_HINT="fresh" @@ -37,6 +40,7 @@ TIMEOUT_VERIFY_S=90 TIMEOUT_ONBOARD_S=180 TIMEOUT_AGENT_S=180 TIMEOUT_GATEWAY_S=240 +PHASE_STALE_WARN_S=60 FRESH_MAIN_STATUS="skip" FRESH_MAIN_VERSION="skip" @@ -462,16 +466,7 @@ resolve_latest_version() { } current_build_commit() { - python3 - <<'PY' -import json -import pathlib - -path = pathlib.Path("dist/build-info.json") -if not path.exists(): - print("") -else: - print(json.loads(path.read_text()).get("commit", "")) -PY + parallels_package_current_build_commit } source_tree_dirty_for_build() { @@ -479,46 +474,50 @@ source_tree_dirty_for_build() { } acquire_build_lock() { - local owner_pid="" - while ! mkdir "$BUILD_LOCK_DIR" 2>/dev/null; do - if [[ -f "$BUILD_LOCK_DIR/pid" ]]; then - owner_pid="$(cat "$BUILD_LOCK_DIR/pid" 2>/dev/null || true)" - if [[ -n "$owner_pid" ]] && ! kill -0 "$owner_pid" >/dev/null 2>&1; then - warn "Removing stale Parallels build lock" - rm -rf "$BUILD_LOCK_DIR" - continue - fi - fi - sleep 1 - done - printf '%s\n' "$$" >"$BUILD_LOCK_DIR/pid" + parallels_package_acquire_build_lock "$BUILD_LOCK_DIR" } release_build_lock() { - if [[ -d "$BUILD_LOCK_DIR" ]]; then - rm -rf "$BUILD_LOCK_DIR" - fi + parallels_package_release_build_lock "$BUILD_LOCK_DIR" } ensure_current_build() { - local head build_commit - acquire_build_lock + local head build_commit rc lock_owned + lock_owned=0 + if [[ "${OPENCLAW_PARALLELS_BUILD_LOCK_HELD:-0}" != "1" ]]; then + acquire_build_lock + lock_owned=1 + fi head="$(git rev-parse HEAD)" build_commit="$(current_build_commit)" if [[ "$build_commit" == "$head" ]] && ! source_tree_dirty_for_build; then - release_build_lock + if [[ "$lock_owned" -eq 1 ]]; then + release_build_lock + fi return fi say "Build dist for current head" + set +e pnpm build + rc=$? + if [[ $rc -eq 0 ]]; then + parallels_package_assert_no_generated_drift + rc=$? + fi build_commit="$(current_build_commit)" - release_build_lock - [[ "$build_commit" == "$head" ]] || die "dist/build-info.json still does not match HEAD after build" + set -e + if [[ "$lock_owned" -eq 1 ]]; then + release_build_lock + fi + [[ $rc -eq 0 ]] || return "$rc" + if [[ "$build_commit" != "$head" ]]; then + warn "dist/build-info.json still does not match HEAD after build" + return 1 + fi } write_package_dist_inventory() { - node --import tsx --input-type=module --eval \ - 'import { writePackageDistInventory } from "./src/infra/package-dist-inventory.ts"; await writePackageDistInventory(process.cwd());' + parallels_package_write_dist_inventory } extract_package_version_from_tgz() { @@ -526,7 +525,7 @@ extract_package_version_from_tgz() { } pack_main_tgz() { - local short_head pkg packed_commit + local short_head pkg packed_commit rc if [[ -n "$TARGET_PACKAGE_SPEC" ]]; then say "Pack target package tgz: $TARGET_PACKAGE_SPEC" pkg="$( @@ -540,13 +539,21 @@ pack_main_tgz() { return fi say "Pack current main tgz" - ensure_current_build - write_package_dist_inventory - short_head="$(git rev-parse --short HEAD)" - pkg="$( - npm pack --ignore-scripts --json --pack-destination "$MAIN_TGZ_DIR" \ - | python3 -c 'import json, sys; data = json.load(sys.stdin); print(data[-1]["filename"])' - )" + acquire_build_lock + set +e + { + OPENCLAW_PARALLELS_BUILD_LOCK_HELD=1 ensure_current_build && + write_package_dist_inventory && + short_head="$(git rev-parse --short HEAD)" && + pkg="$( + npm pack --ignore-scripts --json --pack-destination "$MAIN_TGZ_DIR" \ + | python3 -c 'import json, sys; data = json.load(sys.stdin); print(data[-1]["filename"])' + )" + } + rc=$? + set -e + release_build_lock + [[ $rc -eq 0 ]] || return "$rc" MAIN_TGZ_PATH="$MAIN_TGZ_DIR/openclaw-main-$short_head.tgz" cp "$MAIN_TGZ_DIR/$pkg" "$MAIN_TGZ_PATH" packed_commit="$(extract_package_build_commit_from_tgz "$MAIN_TGZ_PATH")" @@ -780,10 +787,11 @@ phase_run() { local timeout_s="$2" shift 2 - local log_path pid start rc timed_out + local log_path pid start rc timed_out next_warn summary log_path="$(phase_log_path "$phase_id")" say "$phase_id" start=$SECONDS + next_warn=$((start + PHASE_STALE_WARN_S)) timed_out=0 ( @@ -792,6 +800,12 @@ phase_run() { pid=$! while kill -0 "$pid" >/dev/null 2>&1; do + if (( SECONDS >= next_warn )); then + summary="$(parallels_log_progress_extract python3 "$log_path")" + [[ -n "$summary" ]] || summary="waiting for first log line" + warn "$phase_id still running after $((SECONDS - start))s: $summary" + next_warn=$((SECONDS + PHASE_STALE_WARN_S)) + fi if (( SECONDS - start >= timeout_s )); then timed_out=1 kill "$pid" >/dev/null 2>&1 || true diff --git a/scripts/e2e/parallels-macos-smoke.sh b/scripts/e2e/parallels-macos-smoke.sh index a35cd321a36..4648e0a326e 100644 --- a/scripts/e2e/parallels-macos-smoke.sh +++ b/scripts/e2e/parallels-macos-smoke.sh @@ -3,6 +3,7 @@ set -euo pipefail ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" source "$ROOT_DIR/scripts/e2e/lib/parallels-macos-common.sh" +source "$ROOT_DIR/scripts/e2e/lib/parallels-package-common.sh" VM_NAME="macOS Tahoe" SNAPSHOT_HINT="macOS 26.3.1 latest" @@ -57,6 +58,7 @@ TIMEOUT_DASHBOARD_S=180 TIMEOUT_SNAPSHOT_S=360 TIMEOUT_CURRENT_USER_PRLCTL_S=45 TIMEOUT_DISCORD_S=180 +PHASE_STALE_WARN_S=60 FRESH_MAIN_VERSION="skip" LATEST_INSTALLED_VERSION="skip" @@ -1135,7 +1137,7 @@ extract_package_build_commit_from_tgz() { } pack_main_tgz() { - local short_head pkg packed_commit + local short_head pkg packed_commit rc if target_package_installs_directly; then say "Use direct guest install for target package spec: $TARGET_PACKAGE_SPEC" TARGET_EXPECT_VERSION="$(npm view "$TARGET_PACKAGE_SPEC" version --userconfig "$(mktemp)")" @@ -1155,14 +1157,22 @@ pack_main_tgz() { return fi say "Pack current main tgz" - ensure_current_build - write_package_dist_inventory - stage_pack_runtime_deps - short_head="$(git rev-parse --short HEAD)" - pkg="$( - npm pack --ignore-scripts --json --pack-destination "$MAIN_TGZ_DIR" \ - | python3 -c 'import json, sys; data = json.load(sys.stdin); print(data[-1]["filename"])' - )" + acquire_build_lock + set +e + { + OPENCLAW_PARALLELS_BUILD_LOCK_HELD=1 ensure_current_build && + write_package_dist_inventory && + stage_pack_runtime_deps && + short_head="$(git rev-parse --short HEAD)" && + pkg="$( + npm pack --ignore-scripts --json --pack-destination "$MAIN_TGZ_DIR" \ + | python3 -c 'import json, sys; data = json.load(sys.stdin); print(data[-1]["filename"])' + )" + } + rc=$? + set -e + release_build_lock + [[ $rc -eq 0 ]] || return "$rc" MAIN_TGZ_PATH="$MAIN_TGZ_DIR/openclaw-main-$short_head.tgz" cp "$MAIN_TGZ_DIR/$pkg" "$MAIN_TGZ_PATH" packed_commit="$(extract_package_build_commit_from_tgz "$MAIN_TGZ_PATH")" @@ -1182,16 +1192,7 @@ verify_target_version() { } current_build_commit() { - python3 - <<'PY' -import json -import pathlib - -path = pathlib.Path("dist/build-info.json") -if not path.exists(): - print("") -else: - print(json.loads(path.read_text()).get("commit", "")) -PY + parallels_package_current_build_commit } current_control_ui_ready() { @@ -1199,49 +1200,59 @@ current_control_ui_ready() { } acquire_build_lock() { - local owner_pid="" - while ! mkdir "$BUILD_LOCK_DIR" 2>/dev/null; do - if [[ -f "$BUILD_LOCK_DIR/pid" ]]; then - owner_pid="$(cat "$BUILD_LOCK_DIR/pid" 2>/dev/null || true)" - if [[ -n "$owner_pid" ]] && ! kill -0 "$owner_pid" >/dev/null 2>&1; then - warn "Removing stale Parallels build lock" - rm -rf "$BUILD_LOCK_DIR" - continue - fi - fi - sleep 1 - done - printf '%s\n' "$$" >"$BUILD_LOCK_DIR/pid" + parallels_package_acquire_build_lock "$BUILD_LOCK_DIR" } release_build_lock() { - if [[ -d "$BUILD_LOCK_DIR" ]]; then - rm -rf "$BUILD_LOCK_DIR" - fi + parallels_package_release_build_lock "$BUILD_LOCK_DIR" } ensure_current_build() { - local head build_commit - acquire_build_lock + local head build_commit rc lock_owned + lock_owned=0 + if [[ "${OPENCLAW_PARALLELS_BUILD_LOCK_HELD:-0}" != "1" ]]; then + acquire_build_lock + lock_owned=1 + fi head="$(git rev-parse HEAD)" build_commit="$(current_build_commit)" if [[ "$build_commit" == "$head" ]] && current_control_ui_ready; then - release_build_lock + if [[ "$lock_owned" -eq 1 ]]; then + release_build_lock + fi return fi say "Build dist for current head" + set +e pnpm build - say "Build Control UI for current head" - pnpm ui:build + rc=$? + if [[ $rc -eq 0 ]]; then + parallels_package_assert_no_generated_drift + rc=$? + fi + if [[ $rc -eq 0 ]]; then + say "Build Control UI for current head" + pnpm ui:build + rc=$? + fi build_commit="$(current_build_commit)" - release_build_lock - [[ "$build_commit" == "$head" ]] || die "dist/build-info.json still does not match HEAD after build" - current_control_ui_ready || die "dist/control-ui/index.html missing after ui build" + set -e + if [[ "$lock_owned" -eq 1 ]]; then + release_build_lock + fi + [[ $rc -eq 0 ]] || return "$rc" + if [[ "$build_commit" != "$head" ]]; then + warn "dist/build-info.json still does not match HEAD after build" + return 1 + fi + if ! current_control_ui_ready; then + warn "dist/control-ui/index.html missing after ui build" + return 1 + fi } write_package_dist_inventory() { - node --import tsx --input-type=module --eval \ - 'import { writePackageDistInventory } from "./src/infra/package-dist-inventory.ts"; await writePackageDistInventory(process.cwd());' + parallels_package_write_dist_inventory } stage_pack_runtime_deps() { @@ -1721,10 +1732,11 @@ phase_run() { local timeout_s="$2" shift 2 - local log_path pid start rc timed_out + local log_path pid start rc timed_out next_warn summary log_path="$(phase_log_path "$phase_id")" say "$phase_id" start=$SECONDS + next_warn=$((start + PHASE_STALE_WARN_S)) timed_out=0 ( @@ -1733,6 +1745,12 @@ phase_run() { pid=$! while child_job_running "$pid"; do + if (( SECONDS >= next_warn )); then + summary="$(parallels_log_progress_extract python3 "$log_path")" + [[ -n "$summary" ]] || summary="waiting for first log line" + warn "$phase_id still running after $((SECONDS - start))s: $summary" + next_warn=$((SECONDS + PHASE_STALE_WARN_S)) + fi if (( SECONDS - start >= timeout_s )); then timed_out=1 kill "$pid" >/dev/null 2>&1 || true diff --git a/scripts/e2e/parallels-npm-update-smoke.sh b/scripts/e2e/parallels-npm-update-smoke.sh index c48d2a69a30..991b95b32eb 100755 --- a/scripts/e2e/parallels-npm-update-smoke.sh +++ b/scripts/e2e/parallels-npm-update-smoke.sh @@ -3,6 +3,7 @@ set -euo pipefail ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" source "$ROOT_DIR/scripts/e2e/lib/parallels-macos-common.sh" +source "$ROOT_DIR/scripts/e2e/lib/parallels-package-common.sh" MACOS_VM="macOS Tahoe" WINDOWS_VM="Windows 11" @@ -19,6 +20,7 @@ JSON_OUTPUT=0 RUN_DIR="$(mktemp -d /tmp/openclaw-parallels-npm-update.XXXXXX)" MAIN_TGZ_DIR="$(mktemp -d)" MAIN_TGZ_PATH="" +BUILD_LOCK_DIR="${TMPDIR:-/tmp}/openclaw-parallels-build.lock" WINDOWS_UPDATE_SCRIPT_PATH="" SERVER_PID="" HOST_IP="" @@ -31,7 +33,7 @@ UPDATE_EXPECTED_NEEDLE="" API_KEY_VALUE="" PROGRESS_INTERVAL_S=15 PROGRESS_STALE_S=60 -TIMEOUT_UPDATE_S="${OPENCLAW_PARALLELS_NPM_UPDATE_TIMEOUT_S:-900}" +TIMEOUT_UPDATE_S="${OPENCLAW_PARALLELS_NPM_UPDATE_TIMEOUT_S:-1200}" TIMEOUT_UPDATE_POLL_GRACE_S=60 child_job_running() { @@ -328,26 +330,53 @@ sock.close() PY } +current_build_commit() { + parallels_package_current_build_commit +} + +source_tree_dirty_for_build() { + [[ -n "$(git status --porcelain -- src ui packages extensions package.json pnpm-lock.yaml 'tsconfig*.json' 2>/dev/null)" ]] +} + ensure_current_build() { + local build_commit head rc + head="$(git rev-parse HEAD)" + build_commit="$(current_build_commit)" + if [[ "$build_commit" == "$head" ]] && ! source_tree_dirty_for_build; then + return 0 + fi say "Build dist for current head" pnpm build + rc=$? + if [[ $rc -eq 0 ]]; then + parallels_package_assert_no_generated_drift + rc=$? + fi + return "$rc" } write_package_dist_inventory() { - node --import tsx --input-type=module --eval \ - 'import { writePackageDistInventory } from "./src/infra/package-dist-inventory.ts"; await writePackageDistInventory(process.cwd());' + parallels_package_write_dist_inventory } pack_main_tgz() { - local pkg + local pkg rc CURRENT_HEAD="$(git rev-parse HEAD)" CURRENT_HEAD_SHORT="$(git rev-parse --short=7 HEAD)" - ensure_current_build - write_package_dist_inventory - pkg="$( - npm pack --ignore-scripts --json --pack-destination "$MAIN_TGZ_DIR" \ - | "$PYTHON_BIN" -c 'import json, sys; data = json.load(sys.stdin); print(data[-1]["filename"])' - )" + parallels_package_acquire_build_lock "$BUILD_LOCK_DIR" + set +e + { + ensure_current_build && + write_package_dist_inventory && + pkg="$( + npm pack --ignore-scripts --json --pack-destination "$MAIN_TGZ_DIR" \ + | "$PYTHON_BIN" -c 'import json, sys; data = json.load(sys.stdin); print(data[-1]["filename"])' + )" + } + rc=$? + set -e + parallels_package_release_build_lock "$BUILD_LOCK_DIR" + [[ $rc -eq 0 ]] || return "$rc" MAIN_TGZ_PATH="$MAIN_TGZ_DIR/openclaw-main-$CURRENT_HEAD_SHORT.tgz" cp "$MAIN_TGZ_DIR/$pkg" "$MAIN_TGZ_PATH" } @@ -464,20 +493,36 @@ function Wait-GatewayRpcReady { for ($attempt = 1; $attempt -le $Attempts; $attempt++) { Write-ProgressLog "update.gateway-status.attempt-$attempt" try { - Invoke-Logged 'openclaw gateway status' { & $OpenClawPath gateway status --deep --require-rpc } - return + $statusOutput = Invoke-CaptureLogged 'openclaw gateway status' { & $OpenClawPath gateway status --deep --require-rpc } + if ($statusOutput -match 'Read probe:\s*failed') { + throw 'gateway status returned without RPC read readiness' + } + return $true } catch { if ($attempt -ge $Attempts) { - throw + return $false } Write-ProgressLog "update.gateway-status.retry-$attempt" Start-Sleep -Seconds $SleepSeconds } } + return $false +} + +function Stop-GatewayScheduledTaskIfPresent { + $previousNativeErrorPreference = $PSNativeCommandUseErrorActionPreference + try { + $PSNativeCommandUseErrorActionPreference = $false + schtasks /End /TN 'OpenClaw Gateway' 2>$null | Out-Null + } catch { + } finally { + $PSNativeCommandUseErrorActionPreference = $previousNativeErrorPreference + } } function Stop-OpenClawGatewayProcesses { Write-ProgressLog 'update.stop-old-gateway' + Stop-GatewayScheduledTaskIfPresent $patterns = @( 'openclaw-gateway', 'openclaw.*gateway --port 18789', @@ -508,7 +553,112 @@ function Stop-OpenClawGatewayProcesses { ForEach-Object { Stop-Process -Id $_.OwningProcess -Force -ErrorAction SilentlyContinue } - Start-Sleep -Seconds 2 + for ($attempt = 1; $attempt -le 20; $attempt++) { + $listeners = Get-NetTCPConnection -LocalPort 18789 -State Listen -ErrorAction SilentlyContinue + if (-not $listeners) { + return + } + $listeners | + ForEach-Object { + Stop-Process -Id $_.OwningProcess -Force -ErrorAction SilentlyContinue + } + Start-Sleep -Seconds 1 + } + $remaining = Get-NetTCPConnection -LocalPort 18789 -State Listen -ErrorAction SilentlyContinue + if ($remaining) { + $pids = ($remaining | Select-Object -ExpandProperty OwningProcess -Unique) -join ', ' + throw "gateway listener still active on port 18789 after stop attempts: $pids" + } +} + +function Stop-OpenClawUpdateProcesses { + Write-ProgressLog 'update.stop-stale-update' + $patterns = @( + 'openclaw.* update --tag ', + 'openclaw.* completion --write-state' + ) + Get-CimInstance Win32_Process -ErrorAction SilentlyContinue | + Where-Object { + $commandLine = $_.CommandLine + if (-not $commandLine) { + $false + } else { + $matched = $false + foreach ($pattern in $patterns) { + if ($commandLine -match $pattern) { + $matched = $true + break + } + } + $matched + } + } | + Sort-Object ParentProcessId -Descending | + ForEach-Object { + Stop-Process -Id $_.ProcessId -Force -ErrorAction SilentlyContinue + } +} + +function Invoke-OpenClawUpdateWithTimeout { + param( + [Parameter(Mandatory = $true)][string]$OpenClawPath, + [Parameter(Mandatory = $true)][string]$UpdateTarget, + [int]$TimeoutSeconds = 600 + ) + + $updateJob = Start-Job -ScriptBlock { + param([string]$Path, [string]$Target) + $output = & $Path update --tag $Target --yes --json *>&1 + [pscustomobject]@{ + ExitCode = $LASTEXITCODE + Output = ($output | Out-String).Trim() + } + } -ArgumentList $OpenClawPath, $UpdateTarget + + $completed = Wait-Job $updateJob -Timeout $TimeoutSeconds + if ($null -ne $completed) { + $result = Receive-Job $updateJob + if ($null -ne $result.Output -and $result.Output.Length -gt 0) { + $result.Output | Tee-Object -FilePath $LogPath -Append | Out-Null + } + Remove-Job $updateJob -Force -ErrorAction SilentlyContinue + if ($result.ExitCode -ne 0) { + throw "openclaw update failed with exit code $($result.ExitCode)" + } + return + } + + Stop-Job $updateJob -ErrorAction SilentlyContinue + Remove-Job $updateJob -Force -ErrorAction SilentlyContinue + Write-ProgressLog 'update.openclaw-update.timeout' + 'openclaw update timed out after package install window; killing stale update/completion processes and verifying installed version' | Tee-Object -FilePath $LogPath -Append | Out-Null + Stop-OpenClawUpdateProcesses +} + +function Start-GatewayRunFallback { + param( + [Parameter(Mandatory = $true)][string]$OpenClawPath + ) + + Write-ProgressLog 'update.gateway-run-fallback' + Stop-OpenClawGatewayProcesses + $entry = Join-Path $env:APPDATA 'npm\node_modules\openclaw\dist\index.js' + if (-not (Test-Path $entry)) { + throw "openclaw dist entry missing: $entry" + } + $node = (Get-Command node.exe -ErrorAction Stop).Source + $stdout = Join-Path $env:TEMP 'openclaw-parallels-npm-update-gateway.log' + $stderr = Join-Path $env:TEMP 'openclaw-parallels-npm-update-gateway.err.log' + Start-Process -FilePath $node -ArgumentList @($entry, 'gateway', 'run', '--bind', 'loopback', '--port', '18789', '--force') -WindowStyle Hidden -RedirectStandardOutput $stdout -RedirectStandardError $stderr | Out-Null + if (-not (Wait-GatewayRpcReady -OpenClawPath $OpenClawPath -Attempts 20 -SleepSeconds 3)) { + if (Test-Path $stdout) { + Get-Content $stdout -Tail 80 | Tee-Object -FilePath $LogPath -Append | Out-Null + } + if (Test-Path $stderr) { + Get-Content $stderr -Tail 80 | Tee-Object -FilePath $LogPath -Append | Out-Null + } + throw 'gateway did not become RPC-ready after run fallback' + } } function Complete-WorkspaceSetup { @@ -568,17 +718,15 @@ function Restart-GatewayWithRecovery { Remove-Job $restartJob -Force -ErrorAction SilentlyContinue Write-ProgressLog 'update.gateway-status' - try { - Wait-GatewayRpcReady -OpenClawPath $OpenClawPath + if (Wait-GatewayRpcReady -OpenClawPath $OpenClawPath) { return - } catch { - if (-not $restartFailed) { - throw - } - Write-ProgressLog 'update.gateway-start-recover' - Invoke-Logged 'openclaw gateway start' { & $OpenClawPath gateway start } - Write-ProgressLog 'update.gateway-status-recover' - Wait-GatewayRpcReady -OpenClawPath $OpenClawPath + } + Write-ProgressLog 'update.gateway-start-recover' + Stop-OpenClawGatewayProcesses + Invoke-Logged 'openclaw gateway start' { & $OpenClawPath gateway start } + Write-ProgressLog 'update.gateway-status-recover' + if (-not (Wait-GatewayRpcReady -OpenClawPath $OpenClawPath)) { + Start-GatewayRunFallback -OpenClawPath $OpenClawPath } } @@ -597,7 +745,7 @@ try { $openclaw = Join-Path $env:APPDATA 'npm\openclaw.cmd' Stop-OpenClawGatewayProcesses Write-ProgressLog 'update.openclaw-update' - Invoke-Logged 'openclaw update' { & $openclaw update --tag $UpdateTarget --yes --json } + Invoke-OpenClawUpdateWithTimeout -OpenClawPath $openclaw -UpdateTarget $UpdateTarget Write-ProgressLog 'update.verify-version' $version = Invoke-CaptureLogged 'openclaw --version' { & $openclaw --version } if ($ExpectedNeedle -and $version -notmatch [regex]::Escape($ExpectedNeedle)) { @@ -617,7 +765,7 @@ try { Restart-GatewayWithRecovery -OpenClawPath $openclaw Complete-WorkspaceSetup Write-ProgressLog 'update.agent-turn' - Invoke-CaptureLogged 'openclaw agent' { & $openclaw agent --agent main --session-id $SessionId --message 'Reply with exact ASCII text OK only.' --json } | Out-Null + Invoke-CaptureLogged 'openclaw agent' { & $openclaw agent --local --agent main --session-id $SessionId --message 'Reply with exact ASCII text OK only.' --json } | Out-Null $exitCode = $LASTEXITCODE if ($null -eq $exitCode) { $exitCode = 0 @@ -791,20 +939,62 @@ Write-Output \$version if ('$expected_needle' -and \$version -notmatch [regex]::Escape('$expected_needle')) { throw "version mismatch after transport loss: expected substring $expected_needle" } +function Test-GatewayWritable { + param([string]\$Path) + \$statusOutput = & \$Path gateway status --deep --require-rpc *>&1 + if (\$null -ne \$statusOutput) { + \$statusOutput | Write-Output + } + if (\$LASTEXITCODE -ne 0) { + return \$false + } + \$statusText = (\$statusOutput | Out-String) + return (\$statusText -notmatch 'Read probe:\s*failed') +} +function Stop-GatewayListeners { + \$previousNativeErrorPreference = \$PSNativeCommandUseErrorActionPreference + try { + \$PSNativeCommandUseErrorActionPreference = \$false + schtasks /End /TN 'OpenClaw Gateway' 2>\$null | Out-Null + } catch { + } finally { + \$PSNativeCommandUseErrorActionPreference = \$previousNativeErrorPreference + } + Get-CimInstance Win32_Process -ErrorAction SilentlyContinue | + Where-Object { + \$_.CommandLine -and ( + \$_.CommandLine -match 'openclaw.*gateway --port 18789' -or + \$_.CommandLine -match 'openclaw.*gateway run' -or + \$_.CommandLine -match 'dist\\\\index\\.js gateway --port 18789' + ) + } | + ForEach-Object { + Stop-Process -Id \$_.ProcessId -Force -ErrorAction SilentlyContinue + } + for (\$i = 0; \$i -lt 20; \$i++) { + \$listeners = Get-NetTCPConnection -LocalPort 18789 -State Listen -ErrorAction SilentlyContinue + if (-not \$listeners) { + return + } + \$listeners | ForEach-Object { + Stop-Process -Id \$_.OwningProcess -Force -ErrorAction SilentlyContinue + } + Start-Sleep -Seconds 1 + } +} \$gatewayReady = \$false for (\$i = 0; \$i -lt 6; \$i++) { - & \$openclaw gateway status --deep --require-rpc - if (\$LASTEXITCODE -eq 0) { + if (Test-GatewayWritable \$openclaw) { \$gatewayReady = \$true break } Start-Sleep -Seconds 2 } if (-not \$gatewayReady) { + Stop-GatewayListeners & \$openclaw gateway restart for (\$i = 0; \$i -lt 6; \$i++) { - & \$openclaw gateway status --deep --require-rpc - if (\$LASTEXITCODE -eq 0) { + if (Test-GatewayWritable \$openclaw) { \$gatewayReady = \$true break } @@ -812,10 +1002,25 @@ if (-not \$gatewayReady) { } } if (-not \$gatewayReady) { + Stop-GatewayListeners & \$openclaw gateway start for (\$i = 0; \$i -lt 6; \$i++) { - & \$openclaw gateway status --deep --require-rpc - if (\$LASTEXITCODE -eq 0) { + if (Test-GatewayWritable \$openclaw) { + \$gatewayReady = \$true + break + } + Start-Sleep -Seconds 2 + } +} +if (-not \$gatewayReady) { + Stop-GatewayListeners + \$entry = Join-Path \$env:APPDATA 'npm\\node_modules\\openclaw\\dist\\index.js' + \$node = (Get-Command node.exe -ErrorAction Stop).Source + \$stdout = Join-Path \$env:TEMP 'openclaw-parallels-npm-update-recover-gateway.log' + \$stderr = Join-Path \$env:TEMP 'openclaw-parallels-npm-update-recover-gateway.err.log' + Start-Process -FilePath \$node -ArgumentList @(\$entry, 'gateway', 'run', '--bind', 'loopback', '--port', '18789', '--force') -WindowStyle Hidden -RedirectStandardOutput \$stdout -RedirectStandardError \$stderr | Out-Null + for (\$i = 0; \$i -lt 20; \$i++) { + if (Test-GatewayWritable \$openclaw) { \$gatewayReady = \$true break } @@ -848,7 +1053,7 @@ New-Item -ItemType Directory -Path \$stateDir -Force | Out-Null } '@ | Set-Content -Path (Join-Path \$stateDir 'workspace-state.json') -Encoding UTF8 Remove-Item (Join-Path \$workspace 'BOOTSTRAP.md') -Force -ErrorAction SilentlyContinue -& \$openclaw agent --agent main --session-id 'parallels-npm-update-windows-transport-recovery-$expected_needle' --message 'Reply with exact ASCII text OK only.' --json +& \$openclaw agent --local --agent main --session-id 'parallels-npm-update-windows-transport-recovery-$expected_needle' --message 'Reply with exact ASCII text OK only.' --json EOF )" local rc=$? @@ -883,37 +1088,6 @@ stop_timeout_guard() { wait "$pid" 2>/dev/null || true } -extract_log_progress() { - local log_path="$1" - "$PYTHON_BIN" - "$log_path" <<'PY' -import pathlib -import sys - -path = pathlib.Path(sys.argv[1]) -if not path.exists(): - print("") - raise SystemExit(0) - -text = path.read_text(encoding="utf-8", errors="replace") -lines = [line.strip() for line in text.splitlines() if line.strip()] - -for line in reversed(lines): - if line.startswith("==> "): - print(line[4:].strip()) - raise SystemExit(0) - -for line in reversed(lines): - if line.startswith("warn:") or line.startswith("error:"): - print(line) - raise SystemExit(0) - -if lines: - print(lines[-1][:240]) -else: - print("") -PY -} - dump_log_tail() { local label="$1" local log_path="$2" @@ -925,44 +1099,7 @@ dump_log_tail() { monitor_jobs_progress() { local group="$1" shift - - local labels=() - local pids=() - local logs=() - local last_progress=() - local last_print=() - local i summary now running - - while [[ $# -gt 0 ]]; do - labels+=("$1") - pids+=("$2") - logs+=("$3") - last_progress+=("") - last_print+=(0) - shift 3 - done - - say "$group progress; run dir: $RUN_DIR" - - while :; do - running=0 - now=$SECONDS - for ((i = 0; i < ${#pids[@]}; i++)); do - if ! child_job_running "${pids[$i]}"; then - continue - fi - running=1 - summary="$(extract_log_progress "${logs[$i]}")" - [[ -n "$summary" ]] || summary="waiting for first log line" - if [[ "${last_progress[i]}" != "$summary" ]] || (( now - last_print[i] >= PROGRESS_STALE_S )); then - say "$group ${labels[$i]}: $summary" - last_progress[i]="$summary" - last_print[i]=$now - fi - done - (( running )) || break - sleep "$PROGRESS_INTERVAL_S" - done + parallels_monitor_jobs_progress "$group" "$PROGRESS_INTERVAL_S" "$PROGRESS_STALE_S" "$PYTHON_BIN" "$$" "$@" } extract_last_version() { diff --git a/scripts/e2e/parallels-windows-smoke.sh b/scripts/e2e/parallels-windows-smoke.sh index 49720a501df..6d1efb945fe 100644 --- a/scripts/e2e/parallels-windows-smoke.sh +++ b/scripts/e2e/parallels-windows-smoke.sh @@ -1,6 +1,9 @@ #!/usr/bin/env bash set -euo pipefail +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +source "$ROOT_DIR/scripts/e2e/lib/parallels-package-common.sh" + VM_NAME="Windows 11" SNAPSHOT_HINT="pre-openclaw-native-e2e-2026-03-12" MODE="both" @@ -41,7 +44,7 @@ BUILD_LOCK_DIR="${TMPDIR:-/tmp}/openclaw-parallels-build.lock" TIMEOUT_SNAPSHOT_S=240 TIMEOUT_GIT_SETUP_S=1200 TIMEOUT_INSTALL_S=420 -TIMEOUT_UPDATE_S="${OPENCLAW_PARALLELS_WINDOWS_UPDATE_TIMEOUT_S:-1800}" +TIMEOUT_UPDATE_S="${OPENCLAW_PARALLELS_WINDOWS_UPDATE_TIMEOUT_S:-1200}" TIMEOUT_UPDATE_POLL_GRACE_S=60 TIMEOUT_VERIFY_S=120 TIMEOUT_ONBOARD_S=600 @@ -49,6 +52,7 @@ TIMEOUT_ONBOARD_PHASE_S=$((TIMEOUT_ONBOARD_S + 120)) # verify_gateway_reachable runs six 30s probes plus short retry sleeps. TIMEOUT_GATEWAY_S=420 TIMEOUT_AGENT_S=600 +PHASE_STALE_WARN_S=60 FRESH_MAIN_STATUS="skip" FRESH_MAIN_VERSION="skip" @@ -691,10 +695,11 @@ phase_run() { local timeout_s="$2" shift 2 - local log_path pid start rc timed_out + local log_path pid start rc timed_out next_warn summary log_path="$(phase_log_path "$phase_id")" say "$phase_id" start=$SECONDS + next_warn=$((start + PHASE_STALE_WARN_S)) timed_out=0 ( @@ -703,6 +708,12 @@ phase_run() { pid=$! while child_job_running "$pid"; do + if (( SECONDS >= next_warn )); then + summary="$(parallels_log_progress_extract python3 "$log_path")" + [[ -n "$summary" ]] || summary="waiting for first log line" + warn "$phase_id still running after $((SECONDS - start))s: $summary" + next_warn=$((SECONDS + PHASE_STALE_WARN_S)) + fi if (( SECONDS - start >= timeout_s )); then timed_out=1 kill "$pid" >/dev/null 2>&1 || true @@ -848,16 +859,7 @@ PY } current_build_commit() { - python3 - <<'PY' -import json -import pathlib - -path = pathlib.Path("dist/build-info.json") -if not path.exists(): - print("") -else: - print(json.loads(path.read_text()).get("commit", "")) -PY + parallels_package_current_build_commit } source_tree_dirty_for_build() { @@ -865,46 +867,50 @@ source_tree_dirty_for_build() { } acquire_build_lock() { - local owner_pid="" - while ! mkdir "$BUILD_LOCK_DIR" 2>/dev/null; do - if [[ -f "$BUILD_LOCK_DIR/pid" ]]; then - owner_pid="$(cat "$BUILD_LOCK_DIR/pid" 2>/dev/null || true)" - if [[ -n "$owner_pid" ]] && ! kill -0 "$owner_pid" >/dev/null 2>&1; then - warn "Removing stale Parallels build lock" - rm -rf "$BUILD_LOCK_DIR" - continue - fi - fi - sleep 1 - done - printf '%s\n' "$$" >"$BUILD_LOCK_DIR/pid" + parallels_package_acquire_build_lock "$BUILD_LOCK_DIR" } release_build_lock() { - if [[ -d "$BUILD_LOCK_DIR" ]]; then - rm -rf "$BUILD_LOCK_DIR" - fi + parallels_package_release_build_lock "$BUILD_LOCK_DIR" } ensure_current_build() { - local head build_commit - acquire_build_lock + local head build_commit rc lock_owned + lock_owned=0 + if [[ "${OPENCLAW_PARALLELS_BUILD_LOCK_HELD:-0}" != "1" ]]; then + acquire_build_lock + lock_owned=1 + fi head="$(git rev-parse HEAD)" build_commit="$(current_build_commit)" if [[ "$build_commit" == "$head" ]] && ! source_tree_dirty_for_build; then - release_build_lock + if [[ "$lock_owned" -eq 1 ]]; then + release_build_lock + fi return fi say "Build dist for current head" + set +e pnpm build + rc=$? + if [[ $rc -eq 0 ]]; then + parallels_package_assert_no_generated_drift + rc=$? + fi build_commit="$(current_build_commit)" - release_build_lock - [[ "$build_commit" == "$head" ]] || die "dist/build-info.json still does not match HEAD after build" + set -e + if [[ "$lock_owned" -eq 1 ]]; then + release_build_lock + fi + [[ $rc -eq 0 ]] || return "$rc" + if [[ "$build_commit" != "$head" ]]; then + warn "dist/build-info.json still does not match HEAD after build" + return 1 + fi } write_package_dist_inventory() { - node --import tsx --input-type=module --eval \ - 'import { writePackageDistInventory } from "./src/infra/package-dist-inventory.ts"; await writePackageDistInventory(process.cwd());' + parallels_package_write_dist_inventory } ensure_guest_git() { @@ -951,7 +957,7 @@ ensure_mingit_zip() { } pack_main_tgz() { - local short_head pkg packed_commit + local short_head pkg packed_commit rc ensure_mingit_zip if [[ -n "$TARGET_PACKAGE_SPEC" ]]; then say "Pack target package tgz: $TARGET_PACKAGE_SPEC" @@ -966,13 +972,21 @@ pack_main_tgz() { return fi say "Pack current main tgz" - ensure_current_build - write_package_dist_inventory - short_head="$(git rev-parse --short HEAD)" - pkg="$( - npm pack --ignore-scripts --json --pack-destination "$MAIN_TGZ_DIR" \ - | python3 -c 'import json, sys; data = json.load(sys.stdin); print(data[-1]["filename"])' - )" + acquire_build_lock + set +e + { + OPENCLAW_PARALLELS_BUILD_LOCK_HELD=1 ensure_current_build && + write_package_dist_inventory && + short_head="$(git rev-parse --short HEAD)" && + pkg="$( + npm pack --ignore-scripts --json --pack-destination "$MAIN_TGZ_DIR" \ + | python3 -c 'import json, sys; data = json.load(sys.stdin); print(data[-1]["filename"])' + )" + } + rc=$? + set -e + release_build_lock + [[ $rc -eq 0 ]] || return "$rc" MAIN_TGZ_PATH="$MAIN_TGZ_DIR/openclaw-main-$short_head.tgz" cp "$MAIN_TGZ_DIR/$pkg" "$MAIN_TGZ_PATH" packed_commit="$(extract_package_build_commit_from_tgz "$MAIN_TGZ_PATH")" diff --git a/src/cli/update-cli.test.ts b/src/cli/update-cli.test.ts index 51823ca55ca..dea7a34fbe1 100644 --- a/src/cli/update-cli.test.ts +++ b/src/cli/update-cli.test.ts @@ -193,6 +193,7 @@ const { defaultRuntime } = await import("../runtime.js"); const { updateCommand, updateStatusCommand, updateWizardCommand } = await import("./update-cli.js"); const updateCliShared = await import("./update-cli/shared.js"); const { resolveGitInstallDir } = updateCliShared; +const { spawnSync } = await import("node:child_process"); type UpdateCliScenario = { name: string; @@ -458,6 +459,19 @@ describe("update-cli", () => { setStdoutTty(false); }); + it("bounds completion cache refresh during update follow-up", async () => { + const root = createCaseDir("openclaw-completion-timeout"); + pathExists.mockResolvedValue(true); + + await updateCliShared.tryWriteCompletionCache(root, false); + + expect(spawnSync).toHaveBeenCalledWith( + expect.any(String), + [path.join(root, "openclaw.mjs"), "completion", "--write-state"], + expect.objectContaining({ timeout: 30_000 }), + ); + }); + it("respawns into the updated package root before running post-update tasks", async () => { const { entrypoints } = setupUpdatedRootRefresh(); diff --git a/src/cli/update-cli/shared.ts b/src/cli/update-cli/shared.ts index 78fd160941a..c1ccd2de67b 100644 --- a/src/cli/update-cli/shared.ts +++ b/src/cli/update-cli/shared.ts @@ -258,6 +258,8 @@ export async function resolveGlobalManager(params: { return byPresence ?? "npm"; } +const COMPLETION_CACHE_WRITE_TIMEOUT_MS = 30_000; + export async function tryWriteCompletionCache(root: string, jsonMode: boolean): Promise { const binPath = path.join(root, "openclaw.mjs"); if (!(await pathExists(binPath))) { @@ -268,6 +270,7 @@ export async function tryWriteCompletionCache(root: string, jsonMode: boolean): cwd: root, env: process.env, encoding: "utf-8", + timeout: COMPLETION_CACHE_WRITE_TIMEOUT_MS, }); if (result.error) { diff --git a/src/infra/update-global.ts b/src/infra/update-global.ts index 9032a16ca97..8dd409034ad 100644 --- a/src/infra/update-global.ts +++ b/src/infra/update-global.ts @@ -245,6 +245,9 @@ async function collectInstalledPathErrors(params: { if (actualSet !== null && params.unexpectedMessage) { const expectedSet = new Set(params.expectedFiles); for (const relativePath of params.actualFiles ?? []) { + if (NPM_UPDATE_COMPAT_SIDECAR_PATHS.has(relativePath)) { + continue; + } if (!expectedSet.has(relativePath)) { errors.push(params.unexpectedMessage(relativePath)); }