fix(parallels): harden npm update smoke

This commit is contained in:
Peter Steinberger
2026-04-25 00:49:52 +01:00
parent c12af80b5b
commit a1087ea7a6
3 changed files with 317 additions and 48 deletions

View File

@@ -134,6 +134,37 @@ runs the same lanes before release approval.
`openclaw update --tag <candidate>`, and verifies the candidate's
post-update doctor repairs bundled channel runtime dependencies without a
harness-side postinstall repair.
- `pnpm test:parallels:npm-update`
- Runs the native packaged-install update smoke across Parallels guests. Each
selected platform first installs the requested baseline package, then runs
the installed `openclaw update` command in the same guest and verifies the
installed version, update status, gateway readiness, and one local agent
turn.
- Use `--platform macos`, `--platform windows`, or `--platform linux` while
iterating on one guest. Use `--json` for the summary artifact path and
per-lane status.
- Wrap long local runs in a host timeout so Parallels transport stalls cannot
consume the rest of the testing window:
```bash
timeout --foreground 150m pnpm test:parallels:npm-update -- --json
timeout --foreground 90m pnpm test:parallels:npm-update -- --platform windows --json
```
- The script writes nested lane logs under `/tmp/openclaw-parallels-npm-update.*`.
Inspect `windows-update.log`, `macos-update.log`, or `linux-update.log`
before assuming the outer wrapper is hung.
- Windows update can spend 10 to 15 minutes in post-update doctor/runtime
dependency repair on a cold guest; that is still healthy when the nested
npm debug log is advancing.
- Do not run this aggregate wrapper in parallel with individual Parallels
macOS, Windows, or Linux smoke lanes. They share VM state and can collide on
snapshot restore, package serving, or guest gateway state.
- The post-update proof runs the normal bundled plugin surface because
capability facades such as speech, image generation, and media
understanding are loaded through bundled runtime APIs even when the agent
turn itself only checks a simple text response.
- `pnpm openclaw qa aimock`
- Starts only the local AIMock provider server for direct protocol smoke
testing.

View File

@@ -380,16 +380,25 @@ source_tree_dirty_for_build() {
[[ -n "$(git status --porcelain -- src ui packages extensions package.json pnpm-lock.yaml 'tsconfig*.json' 2>/dev/null)" ]]
}
current_build_has_control_ui() {
[[ -f dist/control-ui/index.html ]] || return 1
compgen -G "dist/control-ui/assets/*" >/dev/null
}
ensure_current_build() {
local build_commit head rc
head="$(git rev-parse HEAD)"
build_commit="$(current_build_commit)"
if [[ "$build_commit" == "$head" ]] && ! source_tree_dirty_for_build; then
if [[ "$build_commit" == "$head" ]] && ! source_tree_dirty_for_build && current_build_has_control_ui; then
return 0
fi
say "Build dist for current head"
pnpm build
rc=$?
if [[ $rc -eq 0 ]]; then
pnpm ui:build
rc=$?
fi
if [[ $rc -eq 0 ]]; then
parallels_package_assert_no_generated_drift
rc=$?
@@ -525,6 +534,30 @@ function Invoke-CaptureLogged {
return ($output | Out-String).Trim()
}
function Test-GatewayListenerReady {
$listeners = Get-NetTCPConnection -LocalPort 18789 -State Listen -ErrorAction SilentlyContinue
return [bool]$listeners
}
function Test-GatewayLogReady {
$logDir = Join-Path $env:LOCALAPPDATA 'Temp\openclaw'
if (-not (Test-Path $logDir)) {
return $false
}
$logFile = Get-ChildItem -Path $logDir -Filter 'openclaw-*.log' -File -ErrorAction SilentlyContinue |
Sort-Object LastWriteTime -Descending |
Select-Object -First 1
if (-not $logFile) {
return $false
}
try {
$tail = Get-Content -Path $logFile.FullName -Tail 120 -ErrorAction Stop | Out-String
} catch {
return $false
}
return $tail -match '"ready \('
}
function Wait-GatewayRpcReady {
param(
[Parameter(Mandatory = $true)][string]$OpenClawPath,
@@ -534,11 +567,17 @@ function Wait-GatewayRpcReady {
for ($attempt = 1; $attempt -le $Attempts; $attempt++) {
Write-ProgressLog "update.gateway-status.attempt-$attempt"
if ((Test-GatewayListenerReady) -and (Test-GatewayLogReady)) {
Write-ProgressLog "update.gateway-status.ready-log-$attempt"
return $true
}
try {
$statusOutput = Invoke-CaptureLogged 'openclaw gateway status' { & $OpenClawPath gateway status --deep --require-rpc }
if ($statusOutput -match 'Read probe:\s*failed') {
throw 'gateway status returned without RPC read readiness'
$probeOutput = Invoke-CaptureLogged 'openclaw gateway probe' { & $OpenClawPath gateway probe --url ws://127.0.0.1:18789 --timeout 5000 --json }
$probe = $probeOutput | ConvertFrom-Json
if (-not $probe.ok) {
throw 'gateway probe returned without RPC readiness'
}
Invoke-CaptureLogged 'openclaw gateway status' { & $OpenClawPath gateway status --deep --require-rpc } | Out-Null
return $true
} catch {
if ($attempt -ge $Attempts) {
@@ -674,12 +713,11 @@ function Invoke-OpenClawUpdateWithTimeout {
param(
[Parameter(Mandatory = $true)][string]$OpenClawPath,
[Parameter(Mandatory = $true)][string]$UpdateTarget,
[int]$TimeoutSeconds = 600
[int]$TimeoutSeconds = 1200
)
$updateJob = Start-Job -ScriptBlock {
param([string]$Path, [string]$Target)
$env:OPENCLAW_DISABLE_BUNDLED_PLUGINS = '1'
$output = & $Path update --tag $Target --yes --json *>&1
[pscustomobject]@{
ExitCode = $LASTEXITCODE
@@ -707,6 +745,65 @@ function Invoke-OpenClawUpdateWithTimeout {
Stop-OpenClawUpdateProcesses
}
function Invoke-OpenClawAgentWithTimeout {
param(
[Parameter(Mandatory = $true)][string]$OpenClawPath,
[Parameter(Mandatory = $true)][string]$SessionId,
[int]$TimeoutSeconds = 600
)
$message = 'Reply with exact ASCII text OK only.'
$stdout = Join-Path $env:TEMP ("openclaw-parallels-agent-{0}.out.log" -f ([guid]::NewGuid().ToString('N')))
$stderr = Join-Path $env:TEMP ("openclaw-parallels-agent-{0}.err.log" -f ([guid]::NewGuid().ToString('N')))
$agentJob = Start-Job -ScriptBlock {
param([string]$Path, [string]$AgentSessionId, [string]$AgentMessage, [string]$StdoutPath, [string]$StderrPath)
& $Path agent --local --agent main --session-id $AgentSessionId --message $AgentMessage --json > $StdoutPath 2> $StderrPath
exit $LASTEXITCODE
} -ArgumentList $OpenClawPath, $SessionId, $message, $stdout, $stderr
$deadline = (Get-Date).AddSeconds($TimeoutSeconds)
$combined = ''
while ((Get-Date) -lt $deadline) {
Start-Sleep -Seconds 2
$out = ''
$err = ''
if (Test-Path $stdout) {
$out = Get-Content -Path $stdout -Raw -ErrorAction SilentlyContinue
}
if (Test-Path $stderr) {
$err = Get-Content -Path $stderr -Raw -ErrorAction SilentlyContinue
}
$combined = "$out`n$err"
if ($combined -match '"finalAssistantRawText":\s*"OK"' -or $combined -match '"finalAssistantVisibleText":\s*"OK"') {
if ($combined.Trim().Length -gt 0) {
$combined.Trim() | Tee-Object -FilePath $LogPath -Append | Out-Null
}
Stop-Job $agentJob -ErrorAction SilentlyContinue
Remove-Job $agentJob -Force -ErrorAction SilentlyContinue
return 0
}
if ($agentJob.State -in @('Completed', 'Failed', 'Stopped')) {
if ($combined.Trim().Length -gt 0) {
$combined.Trim() | Tee-Object -FilePath $LogPath -Append | Out-Null
}
Receive-Job $agentJob -ErrorAction SilentlyContinue | Out-Null
$jobState = $agentJob.State
Remove-Job $agentJob -Force -ErrorAction SilentlyContinue
if ($jobState -ne 'Completed') {
throw "openclaw agent failed with job state $jobState"
}
throw 'openclaw agent finished without OK response'
}
}
Stop-Job $agentJob -ErrorAction SilentlyContinue
Remove-Job $agentJob -Force -ErrorAction SilentlyContinue
Write-ProgressLog 'update.agent-turn.timeout'
if ($combined.Trim().Length -gt 0) {
$combined.Trim() | Tee-Object -FilePath $LogPath -Append | Out-Null
}
throw "openclaw agent timed out after ${TimeoutSeconds}s"
}
function Start-GatewayRunFallback {
param(
[Parameter(Mandatory = $true)][string]$OpenClawPath
@@ -836,13 +933,10 @@ try {
# an explicit start only if the RPC endpoint never returns.
Write-ProgressLog 'update.restart-gateway'
Restart-GatewayWithRecovery -OpenClawPath $openclaw
Stop-OpenClawGatewayProcesses
Complete-WorkspaceSetup
Write-ProgressLog 'update.agent-turn'
Invoke-CaptureLogged 'openclaw agent' { & $openclaw agent --local --agent main --session-id $SessionId --message 'Reply with exact ASCII text OK only.' --json } | Out-Null
$exitCode = $LASTEXITCODE
if ($null -eq $exitCode) {
$exitCode = 0
}
$exitCode = Invoke-OpenClawAgentWithTimeout -OpenClawPath $openclaw -SessionId $SessionId
Write-ProgressLog 'update.done'
Set-Content -Path $DonePath -Value ([string]$exitCode)
exit $exitCode
@@ -921,7 +1015,19 @@ verify_macos_update_after_transport_loss() {
cat <<EOF | prlctl exec "$MACOS_VM" /usr/bin/tee "$script_path" >/dev/null
set -euo pipefail
export PATH=/opt/homebrew/bin:/opt/homebrew/opt/node/bin:/opt/homebrew/sbin:/usr/bin:/bin:/usr/sbin:/sbin
export OPENCLAW_PLUGIN_STAGE_DIR="\$HOME/.openclaw/plugin-runtime-deps-parallels"
busy="\$(/bin/ps -axo command | /usr/bin/egrep 'openclaw update|npm install|pnpm install|pnpm run build' | /usr/bin/egrep -v 'egrep|openclaw-npm-update-macos-recover' || true)"
gateway_listener_ready() {
/usr/sbin/lsof -tiTCP:18789 -sTCP:LISTEN >/dev/null 2>&1
}
gateway_log_ready() {
latest="\$(/bin/ls -t /tmp/openclaw/openclaw-*.log 2>/dev/null | /usr/bin/head -n 1 || true)"
[ -n "\$latest" ] || return 1
/usr/bin/tail -n 160 "\$latest" | /usr/bin/grep -q 'ready ('
}
gateway_smoke_ready() {
gateway_listener_ready && gateway_log_ready
}
if [ -n "\$busy" ]; then
printf 'update still has active npm/pnpm/openclaw processes\n%s\n' "\$busy" >&2
exit 1
@@ -937,10 +1043,10 @@ if [ -n "$expected_needle" ]; then
;;
esac
fi
/opt/homebrew/bin/openclaw gateway status --deep --require-rpc >/dev/null 2>&1 || /opt/homebrew/bin/openclaw gateway restart || true
gateway_smoke_ready || /opt/homebrew/bin/openclaw gateway restart || true
gateway_ready=0
for _ in 1 2 3 4 5 6; do
if /opt/homebrew/bin/openclaw gateway status --deep --require-rpc; then
if gateway_smoke_ready; then
gateway_ready=1
break
fi
@@ -949,7 +1055,7 @@ done
if [ "\$gateway_ready" != "1" ]; then
/opt/homebrew/bin/openclaw gateway start || true
for _ in 1 2 3 4 5 6; do
if /opt/homebrew/bin/openclaw gateway status --deep --require-rpc; then
if gateway_smoke_ready; then
gateway_ready=1
break
fi
@@ -957,7 +1063,7 @@ if [ "\$gateway_ready" != "1" ]; then
done
fi
if [ "\$gateway_ready" != "1" ]; then
echo "gateway did not become RPC-ready after transport recovery" >&2
echo "gateway did not become ready after transport recovery" >&2
exit 1
fi
workspace="\${OPENCLAW_WORKSPACE_DIR:-\$HOME/.openclaw/workspace}"
@@ -993,7 +1099,7 @@ print(base64.b64encode(os.environ["PROVIDER_KEY"].encode("utf-8")).decode("ascii
PY
)"
set +e
guest_powershell_poll 120 "$(cat <<EOF
guest_powershell_poll 720 "$(cat <<EOF
\$ErrorActionPreference = 'Stop'
\$openclaw = Join-Path \$env:APPDATA 'npm\\openclaw.cmd'
if (-not (Test-Path \$openclaw)) {
@@ -1126,7 +1232,59 @@ New-Item -ItemType Directory -Path \$stateDir -Force | Out-Null
}
'@ | Set-Content -Path (Join-Path \$stateDir 'workspace-state.json') -Encoding UTF8
Remove-Item (Join-Path \$workspace 'BOOTSTRAP.md') -Force -ErrorAction SilentlyContinue
& \$openclaw agent --local --agent main --session-id 'parallels-npm-update-windows-transport-recovery-$expected_needle' --message 'Reply with exact ASCII text OK only.' --json
Stop-GatewayListeners
\$agentStdout = Join-Path \$env:TEMP ("openclaw-parallels-agent-{0}.out.log" -f ([guid]::NewGuid().ToString('N')))
\$agentStderr = Join-Path \$env:TEMP ("openclaw-parallels-agent-{0}.err.log" -f ([guid]::NewGuid().ToString('N')))
\$agentJob = Start-Job -ScriptBlock {
param([string]\$Path, [string]\$StdoutPath, [string]\$StderrPath)
& \$Path agent --local --agent main --session-id 'parallels-npm-update-windows-transport-recovery-$expected_needle' --message 'Reply with exact ASCII text OK only.' --json > \$StdoutPath 2> \$StderrPath
exit \$LASTEXITCODE
} -ArgumentList \$openclaw, \$agentStdout, \$agentStderr
\$agentDeadline = (Get-Date).AddSeconds(600)
\$agentCombined = ''
while ((Get-Date) -lt \$agentDeadline) {
Start-Sleep -Seconds 2
\$agentOut = ''
\$agentErr = ''
if (Test-Path \$agentStdout) {
\$agentOut = Get-Content -Path \$agentStdout -Raw -ErrorAction SilentlyContinue
}
if (Test-Path \$agentStderr) {
\$agentErr = Get-Content -Path \$agentStderr -Raw -ErrorAction SilentlyContinue
}
\$agentCombined = \$agentOut + [Environment]::NewLine + \$agentErr
if (\$agentCombined -match '"finalAssistantRawText":\s*"OK"' -or \$agentCombined -match '"finalAssistantVisibleText":\s*"OK"') {
if (\$agentCombined.Trim().Length -gt 0) {
\$agentCombined.Trim() | Write-Output
}
Stop-Job \$agentJob -ErrorAction SilentlyContinue
Remove-Job \$agentJob -Force -ErrorAction SilentlyContinue
\$agentJob = \$null
break
}
if (\$agentJob.State -in @('Completed', 'Failed', 'Stopped')) {
if (\$agentCombined.Trim().Length -gt 0) {
\$agentCombined.Trim() | Write-Output
}
Receive-Job \$agentJob -ErrorAction SilentlyContinue | Out-Null
\$agentJobState = \$agentJob.State
Remove-Job \$agentJob -Force -ErrorAction SilentlyContinue
\$agentJob = \$null
if (\$agentJobState -ne 'Completed') {
throw "openclaw agent failed with job state \$agentJobState"
}
throw 'openclaw agent finished without OK response'
break
}
}
if (\$null -ne \$agentJob) {
Stop-Job \$agentJob -ErrorAction SilentlyContinue
Remove-Job \$agentJob -Force -ErrorAction SilentlyContinue
if (\$agentCombined.Trim().Length -gt 0) {
\$agentCombined.Trim() | Write-Output
}
throw 'openclaw agent timed out after 600s'
}
EOF
)"
local rc=$?
@@ -1146,14 +1304,24 @@ start_timeout_guard() {
if [[ -n "$log_path" ]]; then
dump_log_tail "$label" "$log_path"
fi
kill "$pid" >/dev/null 2>&1 || true
terminate_process_tree "$pid" TERM
sleep 2
kill -9 "$pid" >/dev/null 2>&1 || true
terminate_process_tree "$pid" KILL
fi
) >&2 &
printf '%s\n' "$!"
}
terminate_process_tree() {
local pid="$1"
local signal_name="${2:-TERM}"
local child
pgrep -P "$pid" 2>/dev/null | while read -r child; do
terminate_process_tree "$child" "$signal_name"
done
kill "-$signal_name" "$pid" >/dev/null 2>&1 || true
}
stop_timeout_guard() {
local pid="${1:-}"
[[ -n "$pid" ]] || return 0
@@ -1210,27 +1378,50 @@ host_timeout_exec() {
shift
HOST_TIMEOUT_S="$timeout_s" "$PYTHON_BIN" - "$@" <<'PY'
import os
import signal
import subprocess
import sys
timeout = int(os.environ["HOST_TIMEOUT_S"])
args = sys.argv[1:]
process = subprocess.Popen(
args,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
start_new_session=True,
)
try:
completed = subprocess.run(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=timeout)
except subprocess.TimeoutExpired as exc:
if exc.stdout:
sys.stdout.buffer.write(exc.stdout)
if exc.stderr:
sys.stderr.buffer.write(exc.stderr)
stdout, stderr = process.communicate(timeout=timeout)
except subprocess.TimeoutExpired:
try:
os.killpg(process.pid, signal.SIGTERM)
except ProcessLookupError:
pass
except PermissionError:
pass
try:
stdout, stderr = process.communicate(timeout=2)
except subprocess.TimeoutExpired:
try:
os.killpg(process.pid, signal.SIGKILL)
except ProcessLookupError:
pass
except PermissionError:
pass
stdout, stderr = process.communicate()
if stdout:
sys.stdout.buffer.write(stdout)
if stderr:
sys.stderr.buffer.write(stderr)
sys.stderr.write(f"host timeout after {timeout}s\n")
raise SystemExit(124)
if completed.stdout:
sys.stdout.buffer.write(completed.stdout)
if completed.stderr:
sys.stderr.buffer.write(completed.stderr)
raise SystemExit(completed.returncode)
if stdout:
sys.stdout.buffer.write(stdout)
if stderr:
sys.stderr.buffer.write(stderr)
raise SystemExit(process.returncode)
PY
}
@@ -1401,11 +1592,23 @@ run_macos_update() {
set -euo pipefail
export PATH=/opt/homebrew/bin:/opt/homebrew/opt/node/bin:/opt/homebrew/sbin:/usr/bin:/bin:/usr/sbin:/sbin
if [ -z "\${HOME:-}" ]; then export HOME="/Users/\$(id -un)"; fi
export OPENCLAW_PLUGIN_STAGE_DIR="\$HOME/.openclaw/plugin-runtime-deps-parallels"
if [ -z "\${$API_KEY_ENV:-}" ]; then
echo "$API_KEY_ENV is required in the macOS update environment" >&2
exit 1
fi
cd "\$HOME"
gateway_listener_ready() {
/usr/sbin/lsof -tiTCP:18789 -sTCP:LISTEN >/dev/null 2>&1
}
gateway_log_ready() {
latest="\$(/bin/ls -t /tmp/openclaw/openclaw-*.log 2>/dev/null | /usr/bin/head -n 1 || true)"
[ -n "\$latest" ] || return 1
/usr/bin/tail -n 160 "\$latest" | /usr/bin/grep -q 'ready ('
}
gateway_smoke_ready() {
gateway_listener_ready && gateway_log_ready
}
scrub_future_plugin_entries() {
node - <<'JS' || true
const fs = require("fs");
@@ -1446,7 +1649,7 @@ stop_openclaw_gateway_processes() {
# host can observe new plugin metadata mid-update and abort config validation.
scrub_future_plugin_entries
stop_openclaw_gateway_processes
OPENCLAW_DISABLE_BUNDLED_PLUGINS=1 /opt/homebrew/bin/openclaw update --tag "$update_target" --yes --json
/opt/homebrew/bin/openclaw update --tag "$update_target" --yes --json
# Same-guest npm upgrades can leave the old gateway process holding the old
# bundled plugin host version. Stop it before post-update config commands.
stop_openclaw_gateway_processes
@@ -1470,7 +1673,7 @@ fi
/opt/homebrew/bin/openclaw gateway restart || true
gateway_ready=0
for _ in 1 2 3 4 5 6 7 8; do
if /opt/homebrew/bin/openclaw gateway status --deep --require-rpc >/dev/null 2>&1; then
if gateway_smoke_ready; then
gateway_ready=1
break
fi
@@ -1480,7 +1683,7 @@ if [ "\$gateway_ready" != "1" ]; then
stop_openclaw_gateway_processes
/opt/homebrew/bin/openclaw gateway run --bind loopback --port 18789 --force >/tmp/openclaw-parallels-npm-update-macos-gateway.log 2>&1 </dev/null &
for _ in 1 2 3 4 5 6 7 8; do
if /opt/homebrew/bin/openclaw gateway status --deep --require-rpc >/dev/null 2>&1; then
if gateway_smoke_ready; then
gateway_ready=1
break
fi
@@ -1490,7 +1693,9 @@ fi
if [ "\$gateway_ready" != "1" ]; then
tail -n 120 /tmp/openclaw-parallels-npm-update-macos-gateway.log 2>/dev/null || true
fi
/opt/homebrew/bin/openclaw gateway status --deep --require-rpc
if [ "\$gateway_ready" != "1" ]; then
/opt/homebrew/bin/openclaw gateway status --deep --require-rpc
fi
workspace="\${OPENCLAW_WORKSPACE_DIR:-\$HOME/.openclaw/workspace}"
mkdir -p "\$workspace/.openclaw"
cat > "\$workspace/IDENTITY.md" <<'IDENTITY_EOF'
@@ -1577,7 +1782,7 @@ stop_openclaw_gateway_processes() {
# the old host can observe new plugin metadata mid-update and abort validation.
scrub_future_plugin_entries
stop_openclaw_gateway_processes
OPENCLAW_DISABLE_BUNDLED_PLUGINS=1 openclaw update --tag "$update_target" --yes --json
openclaw update --tag "$update_target" --yes --json
# The fresh Linux lane starts a manual gateway; stop the old process before
# post-update config validation sees mixed old-host/new-plugin metadata.
stop_openclaw_gateway_processes

View File

@@ -413,27 +413,50 @@ host_timeout_exec() {
shift
HOST_TIMEOUT_S="$timeout_s" python3 - "$@" <<'PY'
import os
import signal
import subprocess
import sys
timeout = int(os.environ["HOST_TIMEOUT_S"])
args = sys.argv[1:]
process = subprocess.Popen(
args,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
start_new_session=True,
)
try:
completed = subprocess.run(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=timeout)
except subprocess.TimeoutExpired as exc:
if exc.stdout:
sys.stdout.buffer.write(exc.stdout)
if exc.stderr:
sys.stderr.buffer.write(exc.stderr)
stdout, stderr = process.communicate(timeout=timeout)
except subprocess.TimeoutExpired:
try:
os.killpg(process.pid, signal.SIGTERM)
except ProcessLookupError:
pass
except PermissionError:
pass
try:
stdout, stderr = process.communicate(timeout=2)
except subprocess.TimeoutExpired:
try:
os.killpg(process.pid, signal.SIGKILL)
except ProcessLookupError:
pass
except PermissionError:
pass
stdout, stderr = process.communicate()
if stdout:
sys.stdout.buffer.write(stdout)
if stderr:
sys.stderr.buffer.write(stderr)
sys.stderr.write(f"host timeout after {timeout}s\n")
raise SystemExit(124)
if completed.stdout:
sys.stdout.buffer.write(completed.stdout)
if completed.stderr:
sys.stderr.buffer.write(completed.stderr)
raise SystemExit(completed.returncode)
if stdout:
sys.stdout.buffer.write(stdout)
if stderr:
sys.stderr.buffer.write(stderr)
raise SystemExit(process.returncode)
PY
}
@@ -690,6 +713,16 @@ show_log_excerpt() {
tail -n 80 "$log_path" >&2 || true
}
terminate_process_tree() {
local pid="$1"
local signal_name="${2:-TERM}"
local child
pgrep -P "$pid" 2>/dev/null | while read -r child; do
terminate_process_tree "$child" "$signal_name"
done
kill "-$signal_name" "$pid" >/dev/null 2>&1 || true
}
phase_run() {
local phase_id="$1"
local timeout_s="$2"
@@ -716,9 +749,9 @@ phase_run() {
fi
if (( SECONDS - start >= timeout_s )); then
timed_out=1
kill "$pid" >/dev/null 2>&1 || true
terminate_process_tree "$pid" TERM
sleep 2
kill -9 "$pid" >/dev/null 2>&1 || true
terminate_process_tree "$pid" KILL
break
fi
sleep 1