fix: improve parallels smoke progress

This commit is contained in:
Peter Steinberger
2026-04-02 19:38:07 +01:00
parent d631326c5e
commit be4be5e783
3 changed files with 130 additions and 13 deletions

View File

@@ -29,9 +29,10 @@ Use this skill for Parallels guest workflows and smoke interpretation. Do not lo
- Preferred entrypoint: `pnpm test:parallels:npm-update`
- Flow: fresh snapshot -> install npm package baseline -> smoke -> install current main tgz on the same guest -> smoke again.
- Same-guest update verification should set the default model explicitly to `openai/gpt-5.4` before the agent turn and use a fresh explicit `--session-id` so old session model state does not leak into the check.
- The aggregate npm-update wrapper must resolve the Linux VM with the same Ubuntu fallback policy as `parallels-linux-smoke.sh` before both fresh and update lanes. On Peter's current host, missing `Ubuntu 24.04.3 ARM64` should fall back to `Ubuntu 25.10`.
- The aggregate npm-update wrapper must resolve the Linux VM with the same Ubuntu fallback policy as `parallels-linux-smoke.sh` before both fresh and update lanes. Treat any Ubuntu guest with major version `>= 24` as acceptable when the exact default VM is missing, preferring the closest version match. On Peter's current host today, missing `Ubuntu 24.04.3 ARM64` should fall back to `Ubuntu 25.10`.
- On Windows same-guest update checks, restart the gateway after the npm upgrade before `gateway status` / `agent`; in-place global npm updates can otherwise leave stale hashed `dist/*` module imports alive in the running service.
- For Windows same-guest update checks, prefer the done-file/log-drain PowerShell runner pattern over one long-lived `prlctl exec ... powershell -EncodedCommand ...` transport. The guest can finish successfully while the outer `prlctl exec` still hangs.
- The Windows same-guest update helper should write stage markers to its log before long steps like tgz download and `npm install -g` so the outer progress monitor does not sit on `waiting for first log line` during healthy but quiet installs.
- Linux same-guest update verification should also export `HOME=/root`, pass `OPENAI_API_KEY` via `prlctl exec ... /usr/bin/env`, and use `openclaw agent --local`; the fresh Linux baseline does not rely on persisted gateway credentials.
- The npm-update wrapper now prints per-lane progress from the nested log files. If a lane still looks stuck, inspect the nested logs in `runDir` first (`macos-fresh.log`, `windows-fresh.log`, `linux-fresh.log`, `macos-update.log`, `windows-update.log`, `linux-update.log`) instead of assuming the outer wrapper hung.
- If the wrapper fails a lane, read the auto-dumped tail first, then the full nested lane log under `/tmp/openclaw-parallels-npm-update.*`.
@@ -75,7 +76,7 @@ Use this skill for Parallels guest workflows and smoke interpretation. Do not lo
- Preferred entrypoint: `pnpm test:parallels:linux`
- Use the snapshot closest to fresh `Ubuntu 24.04.3 ARM64`.
- If that exact VM is missing on the host, fall back to the closest Ubuntu guest with a fresh poweroff snapshot. On Peter's host today, that is `Ubuntu 25.10`.
- If that exact VM is missing on the host, any Ubuntu guest with major version `>= 24` is acceptable; prefer the closest versioned Ubuntu guest with a fresh poweroff snapshot. On Peter's host today, that is `Ubuntu 25.10`.
- Use plain `prlctl exec`; `--current-user` is not the right transport on this snapshot.
- Fresh snapshots may be missing `curl`, and `apt-get update` can fail on clock skew. Bootstrap with `apt-get -o Acquire::Check-Date=false update` and install `curl ca-certificates`.
- Fresh `main` tgz smoke still needs the latest-release installer first because the snapshot has no Node or npm before bootstrap.

View File

@@ -228,6 +228,7 @@ resolve_vm_name() {
import difflib
import json
import os
import re
import sys
payload = json.loads(os.environ["PRL_VM_JSON"])
@@ -236,6 +237,18 @@ requested_lower = requested.lower()
explicit = os.environ["VM_NAME_EXPLICIT"] == "1"
names = [str(item.get("name", "")).strip() for item in payload if str(item.get("name", "")).strip()]
def parse_ubuntu_version(name: str) -> tuple[int, ...] | None:
match = re.search(r"ubuntu\s+(\d+(?:\.\d+)*)", name, re.IGNORECASE)
if not match:
return None
return tuple(int(part) for part in match.group(1).split("."))
def version_distance(version: tuple[int, ...], target: tuple[int, ...]) -> tuple[int, ...]:
width = max(len(version), len(target))
padded_version = version + (0,) * (width - len(version))
padded_target = target + (0,) * (width - len(target))
return tuple(abs(a - b) for a, b in zip(padded_version, padded_target))
if requested in names:
print(requested)
raise SystemExit(0)
@@ -247,6 +260,27 @@ ubuntu_names = [name for name in names if "ubuntu" in name.lower()]
if not ubuntu_names:
sys.exit(f"default vm not found and no Ubuntu fallback available: {requested}")
requested_version = parse_ubuntu_version(requested) or (24,)
ubuntu_with_versions = [
(name, parse_ubuntu_version(name)) for name in ubuntu_names
]
ubuntu_ge_24 = [
(name, version)
for name, version in ubuntu_with_versions
if version and version[0] >= 24
]
if ubuntu_ge_24:
best_name = min(
ubuntu_ge_24,
key=lambda item: (
version_distance(item[1], requested_version),
-len(item[1]),
item[0].lower(),
),
)[0]
print(best_name)
raise SystemExit(0)
best_name = max(
ubuntu_names,
key=lambda name: difflib.SequenceMatcher(None, requested_lower, name.lower()).ratio(),

View File

@@ -141,6 +141,7 @@ resolve_linux_vm_name() {
import difflib
import json
import os
import re
import sys
payload = json.loads(os.environ["PRL_VM_JSON"])
@@ -148,6 +149,18 @@ requested = os.environ["REQUESTED_VM_NAME"].strip()
requested_lower = requested.lower()
names = [str(item.get("name", "")).strip() for item in payload if str(item.get("name", "")).strip()]
def parse_ubuntu_version(name: str) -> tuple[int, ...] | None:
match = re.search(r"ubuntu\s+(\d+(?:\.\d+)*)", name, re.IGNORECASE)
if not match:
return None
return tuple(int(part) for part in match.group(1).split("."))
def version_distance(version: tuple[int, ...], target: tuple[int, ...]) -> tuple[int, ...]:
width = max(len(version), len(target))
padded_version = version + (0,) * (width - len(version))
padded_target = target + (0,) * (width - len(target))
return tuple(abs(a - b) for a, b in zip(padded_version, padded_target))
if requested in names:
print(requested)
raise SystemExit(0)
@@ -156,6 +169,27 @@ ubuntu_names = [name for name in names if "ubuntu" in name.lower()]
if not ubuntu_names:
sys.exit(f"default vm not found and no Ubuntu fallback available: {requested}")
requested_version = parse_ubuntu_version(requested) or (24,)
ubuntu_with_versions = [
(name, parse_ubuntu_version(name)) for name in ubuntu_names
]
ubuntu_ge_24 = [
(name, version)
for name, version in ubuntu_with_versions
if version and version[0] >= 24
]
if ubuntu_ge_24:
best_name = min(
ubuntu_ge_24,
key=lambda item: (
version_distance(item[1], requested_version),
-len(item[1]),
item[0].lower(),
),
)[0]
print(best_name)
raise SystemExit(0)
best_name = max(
ubuntu_names,
key=lambda name: difflib.SequenceMatcher(None, requested_lower, name.lower()).ratio(),
@@ -221,6 +255,12 @@ param(
$ErrorActionPreference = 'Stop'
$PSNativeCommandUseErrorActionPreference = $false
function Write-ProgressLog {
param([Parameter(Mandatory = $true)][string]$Stage)
"==> $Stage" | Tee-Object -FilePath $LogPath -Append | Out-Null
}
function Invoke-Logged {
param(
[Parameter(Mandatory = $true)][string]$Label,
@@ -284,25 +324,35 @@ try {
$env:PATH = "$env:LOCALAPPDATA\OpenClaw\deps\portable-git\cmd;$env:LOCALAPPDATA\OpenClaw\deps\portable-git\mingw64\bin;$env:LOCALAPPDATA\OpenClaw\deps\portable-git\usr\bin;$env:PATH"
$tgz = Join-Path $env:TEMP 'openclaw-main-update.tgz'
Remove-Item $tgz, $LogPath, $DonePath -Force -ErrorAction SilentlyContinue
Write-ProgressLog 'update.start'
Set-Item -Path ('Env:' + $ProviderKeyEnv) -Value $ProviderKey
Write-ProgressLog 'update.download-tgz'
Invoke-Logged 'download current tgz' { curl.exe -fsSL $TgzUrl -o $tgz }
Write-ProgressLog 'update.install-tgz'
Invoke-Logged 'npm install current tgz' { npm.cmd install -g $tgz --no-fund --no-audit }
$openclaw = Join-Path $env:APPDATA 'npm\openclaw.cmd'
Write-ProgressLog 'update.verify-version'
$version = Invoke-CaptureLogged 'openclaw --version' { & $openclaw --version }
if ($version -notmatch [regex]::Escape($HeadShort)) {
throw "version mismatch: expected substring $HeadShort"
}
Write-ProgressLog $version
Write-ProgressLog 'update.set-model'
Invoke-Logged 'openclaw models set' { & $openclaw models set $ModelId }
# Windows can keep the old hashed dist modules alive across in-place global npm upgrades.
# Restart the gateway/service before verifying status or the next agent turn.
Write-ProgressLog 'update.restart-gateway'
Invoke-Logged 'openclaw gateway restart' { & $openclaw gateway restart }
Start-Sleep -Seconds 5
Write-ProgressLog 'update.gateway-status'
Invoke-Logged 'openclaw gateway status' { & $openclaw gateway status --deep --require-rpc }
Write-ProgressLog 'update.agent-turn'
Invoke-CaptureLogged 'openclaw agent' { & $openclaw agent --agent main --session-id $SessionId --message 'Reply with exact ASCII text OK only.' --json } | Out-Null
$exitCode = $LASTEXITCODE
if ($null -eq $exitCode) {
$exitCode = 0
}
Write-ProgressLog 'update.done'
Set-Content -Path $DonePath -Value ([string]$exitCode)
exit $exitCode
} catch {
@@ -510,11 +560,14 @@ run_windows_script_via_log() {
local model_id="$5"
local provider_key_env="$6"
local provider_key="$7"
local runner_name log_name done_name done_status launcher_state
local runner_name log_name done_name done_status launcher_state guest_log
local start_seconds poll_deadline startup_checked poll_rc state_rc log_rc
local log_state_path
runner_name="openclaw-update-$RANDOM-$RANDOM.ps1"
log_name="openclaw-update-$RANDOM-$RANDOM.log"
done_name="openclaw-update-$RANDOM-$RANDOM.done"
log_state_path="$(mktemp "${TMPDIR:-/tmp}/openclaw-update-log-state.XXXXXX")"
: >"$log_state_path"
start_seconds="$SECONDS"
poll_deadline=$((SECONDS + 900))
startup_checked=0
@@ -541,6 +594,34 @@ Start-Process powershell.exe -ArgumentList @(
EOF
)"
stream_windows_update_log() {
set +e
guest_log="$(
guest_powershell_poll 20 "\$log = Join-Path \$env:TEMP '$log_name'; if (Test-Path \$log) { Get-Content \$log }"
)"
log_rc=$?
set -e
if [[ $log_rc -ne 0 ]] || [[ -z "$guest_log" ]]; then
return "$log_rc"
fi
GUEST_LOG="$guest_log" python3 - "$log_state_path" <<'PY'
import os
import pathlib
import sys
state_path = pathlib.Path(sys.argv[1])
previous = state_path.read_text(encoding="utf-8", errors="replace")
current = os.environ["GUEST_LOG"].replace("\r\n", "\n").replace("\r", "\n")
if current.startswith(previous):
sys.stdout.write(current[len(previous):])
else:
sys.stdout.write(current)
state_path.write_text(current, encoding="utf-8")
PY
}
while :; do
set +e
done_status="$(
@@ -558,14 +639,18 @@ EOF
sleep 2
continue
fi
set +e
stream_windows_update_log
log_rc=$?
set -e
if [[ $log_rc -ne 0 ]]; then
warn "windows update helper live log poll failed; retrying"
fi
if [[ -n "$done_status" ]]; then
set +e
guest_powershell_poll 20 "\$log = Join-Path \$env:TEMP '$log_name'; if (Test-Path \$log) { Get-Content \$log }"
log_rc=$?
set -e
if [[ $log_rc -ne 0 ]]; then
if ! stream_windows_update_log; then
warn "windows update helper log drain failed after completion"
fi
rm -f "$log_state_path"
[[ "$done_status" == "0" ]]
return $?
fi
@@ -584,13 +669,10 @@ EOF
fi
fi
if (( SECONDS >= poll_deadline )); then
set +e
guest_powershell_poll 20 "\$log = Join-Path \$env:TEMP '$log_name'; if (Test-Path \$log) { Get-Content \$log }"
log_rc=$?
set -e
if [[ $log_rc -ne 0 ]]; then
if ! stream_windows_update_log; then
warn "windows update helper log drain failed after timeout"
fi
rm -f "$log_state_path"
warn "windows update helper timed out waiting for done file"
return 1
fi