diff --git a/CHANGELOG.md b/CHANGELOG.md index 7062886605e..babcf824d06 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -76,6 +76,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- CLI/update: guard Windows scheduled-task stops by state and timeout so auto-update restart cannot hang indefinitely on `schtasks /End` before stale-listener cleanup. Fixes #69970. Thanks @yangswld and @sherlock-huang. - Gateway/install: refresh loaded gateway service installs when the current service embeds stale gateway auth instead of returning already-installed, avoiding LaunchAgent token-mismatch loops after token rotation. Fixes #70752. Thanks @hyspacex. - Update: ignore bundled plugin `.openclaw-install-stage` directories during global install verification and packaged dist pruning so leftover runtime-dep staging files do not turn successful updates into `unexpected packaged dist file` failures. Fixes #71752. Thanks @waynegault. - Node runtime: keep node-host retry timers alive across Gateway restarts and exit on terminal credential pauses so supervised nodes do not become silent zombies. Fixes #69800. Thanks @meroli28. diff --git a/src/cli/update-cli/restart-helper.test.ts b/src/cli/update-cli/restart-helper.test.ts index e85e2c8d6ce..722c9c630a5 100644 --- a/src/cli/update-cli/restart-helper.test.ts +++ b/src/cli/update-cli/restart-helper.test.ts @@ -69,36 +69,44 @@ exit 0 } function expectWindowsRestartWaitOrdering(content: string, port = 18789) { - const endCommand = 'schtasks /End /TN "'; - const pollAttemptsInit = "set /a attempts=0"; - const pollLabel = ":wait_for_port_release"; - const pollAttemptIncrement = "set /a attempts+=1"; - const pollNetstatCheck = `netstat -ano | findstr /R /C:":${port} .*LISTENING" >nul`; - const forceKillLabel = ":force_kill_listener"; - const forceKillCommand = "taskkill /F /PID %%P >>"; - const portReleasedLabel = ":port_released"; - const runCommand = 'schtasks /Run /TN "'; - const endIndex = content.indexOf(endCommand); - const attemptsInitIndex = content.indexOf(pollAttemptsInit, endIndex); - const pollLabelIndex = content.indexOf(pollLabel, attemptsInitIndex); - const pollAttemptIncrementIndex = content.indexOf(pollAttemptIncrement, pollLabelIndex); - const pollNetstatCheckIndex = content.indexOf(pollNetstatCheck, pollAttemptIncrementIndex); - const forceKillLabelIndex = content.indexOf(forceKillLabel, pollNetstatCheckIndex); - const forceKillCommandIndex = content.indexOf(forceKillCommand, forceKillLabelIndex); - const portReleasedLabelIndex = content.indexOf(portReleasedLabel, forceKillCommandIndex); - const runIndex = content.indexOf(runCommand, portReleasedLabelIndex); + const stateCheck = "$taskState = Get-OpenClawScheduledTaskState -TaskName $taskName"; + const runningGuard = 'if ($taskState -eq "Running")'; + const endCommand = + 'Invoke-OpenClawSchtasksWithTimeout -Arguments @("/End", "/TN", $taskName) -TimeoutSeconds 10'; + const skipEndLog = "openclaw restart skipped schtasks end"; + const pollLoop = "for ($attempt = 1; $attempt -le 10; $attempt++)"; + const pollCall = `Get-OpenClawListenerPids -Port $port`; + const forceKillBranch = "if ($attempt -eq 10)"; + const forceKillCommand = "Stop-Process -Id $listenerPid -Force"; + const runCommand = + 'Invoke-OpenClawSchtasksWithTimeout -Arguments @("/Run", "/TN", $taskName) -TimeoutSeconds 30'; + const portAssignment = `$port = ${port}`; + const stateCheckIndex = content.indexOf(stateCheck); + const runningGuardIndex = content.indexOf(runningGuard, stateCheckIndex); + const endIndex = content.indexOf(endCommand, runningGuardIndex); + const skipEndLogIndex = content.indexOf(skipEndLog, endIndex); + const portAssignmentIndex = content.indexOf(portAssignment); + const pollLoopIndex = content.indexOf(pollLoop, skipEndLogIndex); + const pollCallIndex = content.indexOf(pollCall, pollLoopIndex); + const forceKillBranchIndex = content.indexOf(forceKillBranch, pollCallIndex); + const forceKillCommandIndex = content.indexOf(forceKillCommand, forceKillBranchIndex); + const runIndex = content.indexOf(runCommand, forceKillCommandIndex); - expect(endIndex).toBeGreaterThanOrEqual(0); - expect(attemptsInitIndex).toBeGreaterThan(endIndex); - expect(pollLabelIndex).toBeGreaterThan(attemptsInitIndex); - expect(pollAttemptIncrementIndex).toBeGreaterThan(pollLabelIndex); - expect(pollNetstatCheckIndex).toBeGreaterThan(pollAttemptIncrementIndex); - expect(forceKillLabelIndex).toBeGreaterThan(pollNetstatCheckIndex); - expect(forceKillCommandIndex).toBeGreaterThan(forceKillLabelIndex); - expect(portReleasedLabelIndex).toBeGreaterThan(forceKillCommandIndex); - expect(runIndex).toBeGreaterThan(portReleasedLabelIndex); + expect(stateCheckIndex).toBeGreaterThanOrEqual(0); + expect(runningGuardIndex).toBeGreaterThan(stateCheckIndex); + expect(endIndex).toBeGreaterThan(runningGuardIndex); + expect(skipEndLogIndex).toBeGreaterThan(endIndex); + expect(portAssignmentIndex).toBeGreaterThanOrEqual(0); + expect(pollLoopIndex).toBeGreaterThan(skipEndLogIndex); + expect(pollCallIndex).toBeGreaterThan(pollLoopIndex); + expect(forceKillBranchIndex).toBeGreaterThan(pollCallIndex); + expect(forceKillCommandIndex).toBeGreaterThan(forceKillBranchIndex); + expect(runIndex).toBeGreaterThan(forceKillCommandIndex); expect(content).not.toContain("timeout /t 3 /nobreak >nul"); + expect(content).not.toContain("findstr"); + expect(content).not.toContain("netstat -ano |"); + expect(content).not.toContain("schtasks /End /TN"); } beforeEach(() => { @@ -296,21 +304,25 @@ exit 0 await cleanupScript(scriptPath); }); - it("creates a schtasks restart script on Windows", async () => { + it("creates a guarded schtasks restart script on Windows", async () => { Object.defineProperty(process, "platform", { value: "win32" }); const { scriptPath, content } = await prepareAndReadScript({ OPENCLAW_PROFILE: "default", }); - expect(scriptPath.endsWith(".bat")).toBe(true); + expect(scriptPath.endsWith(".cmd")).toBe(true); expect(content).toContain("@echo off"); + expect(content).toContain("powershell -NoProfile -ExecutionPolicy Bypass -Command"); + expect(content).not.toContain("-File"); + expect(content).toContain('$ErrorActionPreference = "Continue"'); expect(content).toContain("gateway-restart.log"); - expect(content).toContain("openclaw restart attempt source=update target=OpenClaw Gateway"); - expect(content).toContain('schtasks /End /TN "OpenClaw Gateway"'); - expect(content).toContain('schtasks /Run /TN "OpenClaw Gateway" >>'); + expect(content).toContain("$taskName = 'OpenClaw Gateway'"); + expect(content).toContain("function Invoke-OpenClawSchtasksWithTimeout"); + expect(content).toContain("function Get-OpenClawScheduledTaskState"); + expect(content).toContain("Get-ScheduledTask -TaskName $TaskName"); + expect(content).toContain("openclaw restart skipped schtasks end"); expectWindowsRestartWaitOrdering(content); - // Batch self-cleanup - expect(content).toContain('del "%~f0"'); + expect(content).toContain('del "%~f0" >nul 2>&1'); await cleanupScript(scriptPath); }); @@ -321,8 +333,11 @@ exit 0 OPENCLAW_PROFILE: "default", OPENCLAW_WINDOWS_TASK_NAME: "OpenClaw Gateway (custom)", }); - expect(content).toContain('schtasks /End /TN "OpenClaw Gateway (custom)"'); - expect(content).toContain('schtasks /Run /TN "OpenClaw Gateway (custom)"'); + expect(content).toContain("$taskName = 'OpenClaw Gateway (custom)'"); + expect(content).toContain("Get-OpenClawScheduledTaskState -TaskName $taskName"); + expect(content).toContain( + 'Invoke-OpenClawSchtasksWithTimeout -Arguments @("/End", "/TN", $taskName) -TimeoutSeconds 10', + ); expectWindowsRestartWaitOrdering(content); await cleanupScript(scriptPath); }); @@ -337,10 +352,10 @@ exit 0 }, customPort, ); - expect(content).toContain(`netstat -ano | findstr /R /C:":${customPort} .*LISTENING" >nul`); - expect(content).toContain( - `for /f "tokens=5" %%P in ('netstat -ano ^| findstr /R /C:":${customPort} .*LISTENING"') do (`, - ); + expect(content).toContain(`$port = ${customPort}`); + expect(content).toContain("Get-NetTCPConnection -LocalPort $Port -State Listen"); + expect(content).toContain("& netstat.exe -ano -p tcp"); + expect(content).not.toContain("findstr"); expectWindowsRestartWaitOrdering(content, customPort); await cleanupScript(scriptPath); }); @@ -371,7 +386,7 @@ exit 0 const { scriptPath, content } = await prepareAndReadScript({ OPENCLAW_PROFILE: "production", }); - expect(content).toContain('schtasks /End /TN "OpenClaw Gateway (production)"'); + expect(content).toContain("$taskName = 'OpenClaw Gateway (production)'"); expectWindowsRestartWaitOrdering(content); await cleanupScript(scriptPath); }); diff --git a/src/cli/update-cli/restart-helper.ts b/src/cli/update-cli/restart-helper.ts index a2191f7d4ae..b796c77fb2c 100644 --- a/src/cli/update-cli/restart-helper.ts +++ b/src/cli/update-cli/restart-helper.ts @@ -10,8 +10,8 @@ import { resolveGatewayWindowsTaskName, } from "../../daemon/constants.js"; import { - renderCmdRestartLogSetup, renderPosixRestartLogSetup, + resolveGatewayRestartLogPath, shellEscapeRestartLogValue, } from "../../daemon/restart-logs.js"; import { normalizeOptionalString } from "../../shared/string-coerce.js"; @@ -25,12 +25,15 @@ function shellEscape(value: string): string { return value.replace(/'/g, "'\\''"); } -/** Validates a string is safe for embedding in a batch (cmd.exe) script. */ -function isBatchSafe(value: string): boolean { - // Reject characters that have special meaning in batch: & | < > ^ % " ` $ +/** Validates a task name is safe for embedding in Windows restart scripts. */ +function isWindowsTaskNameSafe(value: string): boolean { return /^[A-Za-z0-9 _\-().]+$/.test(value); } +function powerShellSingleQuote(value: string): string { + return `'${value.replace(/'/g, "''")}'`; +} + function resolveSystemdUnit(env: NodeJS.ProcessEnv): string { const override = normalizeOptionalString(env.OPENCLAW_SYSTEMD_UNIT); if (override) { @@ -138,45 +141,189 @@ exit "$status" `; } else if (platform === "win32") { const taskName = resolveWindowsTaskName(env); - if (!isBatchSafe(taskName)) { + if (!isWindowsTaskNameSafe(taskName)) { return null; } const port = Number.isFinite(gatewayPort) && gatewayPort > 0 ? gatewayPort : DEFAULT_GATEWAY_PORT; - const restartLog = renderCmdRestartLogSetup({ ...process.env, ...env }); - filename = `openclaw-restart-${timestamp}.bat`; + const restartLogPath = resolveGatewayRestartLogPath({ ...process.env, ...env }); + const quotedLogPath = powerShellSingleQuote(restartLogPath); + const quotedTaskName = powerShellSingleQuote(taskName); + filename = `openclaw-restart-${timestamp}.cmd`; scriptContent = `@echo off -REM Standalone restart script — survives parent process termination. -REM Wait briefly to ensure file locks are released after update. -timeout /t 2 /nobreak >nul -${restartLog.lines.join("\r\n")} ->> ${restartLog.quotedLogPath} 2>&1 echo [%DATE% %TIME%] openclaw restart attempt source=update target=${taskName} -schtasks /End /TN "${taskName}" >> ${restartLog.quotedLogPath} 2>&1 -REM Poll for gateway port release before rerun; force-kill listener if stuck. -set /a attempts=0 -:wait_for_port_release -set /a attempts+=1 -netstat -ano | findstr /R /C:":${port} .*LISTENING" >nul -if errorlevel 1 goto port_released -if %attempts% GEQ 10 goto force_kill_listener -timeout /t 1 /nobreak >nul -goto wait_for_port_release -:force_kill_listener -for /f "tokens=5" %%P in ('netstat -ano ^| findstr /R /C:":${port} .*LISTENING"') do ( - taskkill /F /PID %%P >> ${restartLog.quotedLogPath} 2>&1 - goto port_released -) -:port_released -schtasks /Run /TN "${taskName}" >> ${restartLog.quotedLogPath} 2>&1 +REM Standalone restart script - survives parent process termination. +REM Keep this as a cmd wrapper so Group Policy script execution policies +REM cannot block the update restart handoff before schtasks.exe runs. +setlocal +set "OPENCLAW_RESTART_SCRIPT=%~f0" +powershell -NoProfile -ExecutionPolicy Bypass -Command "$p=$env:OPENCLAW_RESTART_SCRIPT; $s=Get-Content -Raw -LiteralPath $p; $m='# POWERSHELL'; $i=$s.IndexOf($m); if ($i -lt 0) { exit 1 }; Invoke-Expression $s.Substring($i)" set "status=%ERRORLEVEL%" -if not "%status%"=="0" ( - >> ${restartLog.quotedLogPath} 2>&1 echo [%DATE% %TIME%] openclaw restart failed source=update status=%status% -) else ( - >> ${restartLog.quotedLogPath} 2>&1 echo [%DATE% %TIME%] openclaw restart done source=update -) -REM Self-cleanup -del "%~f0" +del "%~f0" >nul 2>&1 exit /b %status% +# POWERSHELL +# Wait briefly to ensure file locks are released after update. +$ErrorActionPreference = "Continue" +Start-Sleep -Seconds 2 + +$logPath = ${quotedLogPath} +try { + $logDir = Split-Path -Parent $logPath + New-Item -ItemType Directory -Path $logDir -Force | Out-Null + Add-Content -LiteralPath $logPath -Value "[$(Get-Date -Format o)] openclaw restart log initialized" +} catch { + # Restart should still run if log setup is unavailable. +} + +function Write-RestartLog { + param([string]$Message) + try { + Add-Content -LiteralPath $logPath -Value "[$(Get-Date -Format o)] $Message" + } catch { + } +} + +function Join-OpenClawProcessArguments { + param([string[]]$Arguments) + ($Arguments | ForEach-Object { + if ($_ -match "\\s") { + '"' + $_ + '"' + } else { + $_ + } + }) -join " " +} + +function Invoke-OpenClawSchtasksWithTimeout { + param( + [string[]]$Arguments, + [int]$TimeoutSeconds + ) + $process = $null + try { + $startInfo = [System.Diagnostics.ProcessStartInfo]::new() + $startInfo.FileName = "schtasks.exe" + $startInfo.Arguments = Join-OpenClawProcessArguments -Arguments $Arguments + $startInfo.UseShellExecute = $false + $startInfo.RedirectStandardOutput = $true + $startInfo.RedirectStandardError = $true + $process = [System.Diagnostics.Process]::Start($startInfo) + if (-not $process.WaitForExit($TimeoutSeconds * 1000)) { + try { + $process.Kill() + } catch { + } + Write-RestartLog "openclaw restart schtasks timeout source=update args=$($Arguments -join ' ')" + return 124 + } + $stdout = $process.StandardOutput.ReadToEnd() + $stderr = $process.StandardError.ReadToEnd() + if ($stdout) { + Write-RestartLog $stdout.Trim() + } + if ($stderr) { + Write-RestartLog $stderr.Trim() + } + return $process.ExitCode + } catch { + Write-RestartLog "openclaw restart schtasks failed source=update args=$($Arguments -join ' ') error=$($_.Exception.Message)" + return 1 + } +} + +function Get-OpenClawScheduledTaskState { + param([string]$TaskName) + try { + $task = Get-ScheduledTask -TaskName $TaskName -ErrorAction Stop + if ($task -and $task.State) { + return [string]$task.State + } + } catch { + } + + try { + $queryOutput = & schtasks.exe /Query /TN $TaskName /FO LIST 2>$null + foreach ($line in $queryOutput) { + if ($line -match "^\\s*Status:\\s*(.+?)\\s*$") { + return $Matches[1] + } + } + } catch { + } + + return "Unknown" +} + +function Get-OpenClawListenerPids { + param([int]$Port) + $listenerPids = @() + + try { + if (Get-Command Get-NetTCPConnection -ErrorAction SilentlyContinue) { + $listenerPids += Get-NetTCPConnection -LocalPort $Port -State Listen -ErrorAction SilentlyContinue | + ForEach-Object { [int]$_.OwningProcess } + } + } catch { + } + + if ($listenerPids.Count -eq 0) { + try { + $portPattern = [regex]::Escape(":$Port") + $linePattern = "^\\s*TCP\\s+\\S+$portPattern\\s+\\S+\\s+LISTENING\\s+(\\d+)\\s*$" + & netstat.exe -ano -p tcp 2>$null | ForEach-Object { + if ($_ -match $linePattern) { + $listenerPids += [int]$Matches[1] + } + } + } catch { + } + } + + $listenerPids | Sort-Object -Unique +} + +$taskName = ${quotedTaskName} +$port = ${port} +Write-RestartLog "openclaw restart attempt source=update target=$taskName" + +$taskState = Get-OpenClawScheduledTaskState -TaskName $taskName +if ($taskState -eq "Running") { + $endStatus = Invoke-OpenClawSchtasksWithTimeout -Arguments @("/End", "/TN", $taskName) -TimeoutSeconds 10 + if ($endStatus -ne 0) { + Write-RestartLog "openclaw restart schtasks end did not complete cleanly source=update status=$endStatus" + } +} else { + Write-RestartLog "openclaw restart skipped schtasks end source=update state=$taskState" +} + +for ($attempt = 1; $attempt -le 10; $attempt++) { + $listeners = @(Get-OpenClawListenerPids -Port $port) + if ($listeners.Count -eq 0) { + break + } + + if ($attempt -eq 10) { + foreach ($listenerPid in $listeners) { + try { + Stop-Process -Id $listenerPid -Force -ErrorAction Stop + Write-RestartLog "openclaw restart killed stale listener source=update pid=$listenerPid" + } catch { + Write-RestartLog "openclaw restart failed to kill stale listener source=update pid=$listenerPid error=$($_.Exception.Message)" + } + } + break + } + + Start-Sleep -Seconds 1 +} + +$status = Invoke-OpenClawSchtasksWithTimeout -Arguments @("/Run", "/TN", $taskName) -TimeoutSeconds 30 +if ($status -eq 0) { + Write-RestartLog "openclaw restart done source=update" +} else { + Write-RestartLog "openclaw restart failed source=update status=$status" +} + +exit $status `; } else { return null;