fix(update): bound Windows scheduled task stop

This commit is contained in:
Peter Steinberger
2026-04-26 07:56:39 +01:00
parent 3ad29972d0
commit d1e5f4bd3c
3 changed files with 240 additions and 77 deletions

View File

@@ -76,6 +76,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- CLI/update: guard Windows scheduled-task stops by state and timeout so auto-update restart cannot hang indefinitely on `schtasks /End` before stale-listener cleanup. Fixes #69970. Thanks @yangswld and @sherlock-huang.
- Gateway/install: refresh loaded gateway service installs when the current service embeds stale gateway auth instead of returning already-installed, avoiding LaunchAgent token-mismatch loops after token rotation. Fixes #70752. Thanks @hyspacex.
- Update: ignore bundled plugin `.openclaw-install-stage` directories during global install verification and packaged dist pruning so leftover runtime-dep staging files do not turn successful updates into `unexpected packaged dist file` failures. Fixes #71752. Thanks @waynegault.
- Node runtime: keep node-host retry timers alive across Gateway restarts and exit on terminal credential pauses so supervised nodes do not become silent zombies. Fixes #69800. Thanks @meroli28.

View File

@@ -69,36 +69,44 @@ exit 0
}
function expectWindowsRestartWaitOrdering(content: string, port = 18789) {
const endCommand = 'schtasks /End /TN "';
const pollAttemptsInit = "set /a attempts=0";
const pollLabel = ":wait_for_port_release";
const pollAttemptIncrement = "set /a attempts+=1";
const pollNetstatCheck = `netstat -ano | findstr /R /C:":${port} .*LISTENING" >nul`;
const forceKillLabel = ":force_kill_listener";
const forceKillCommand = "taskkill /F /PID %%P >>";
const portReleasedLabel = ":port_released";
const runCommand = 'schtasks /Run /TN "';
const endIndex = content.indexOf(endCommand);
const attemptsInitIndex = content.indexOf(pollAttemptsInit, endIndex);
const pollLabelIndex = content.indexOf(pollLabel, attemptsInitIndex);
const pollAttemptIncrementIndex = content.indexOf(pollAttemptIncrement, pollLabelIndex);
const pollNetstatCheckIndex = content.indexOf(pollNetstatCheck, pollAttemptIncrementIndex);
const forceKillLabelIndex = content.indexOf(forceKillLabel, pollNetstatCheckIndex);
const forceKillCommandIndex = content.indexOf(forceKillCommand, forceKillLabelIndex);
const portReleasedLabelIndex = content.indexOf(portReleasedLabel, forceKillCommandIndex);
const runIndex = content.indexOf(runCommand, portReleasedLabelIndex);
const stateCheck = "$taskState = Get-OpenClawScheduledTaskState -TaskName $taskName";
const runningGuard = 'if ($taskState -eq "Running")';
const endCommand =
'Invoke-OpenClawSchtasksWithTimeout -Arguments @("/End", "/TN", $taskName) -TimeoutSeconds 10';
const skipEndLog = "openclaw restart skipped schtasks end";
const pollLoop = "for ($attempt = 1; $attempt -le 10; $attempt++)";
const pollCall = `Get-OpenClawListenerPids -Port $port`;
const forceKillBranch = "if ($attempt -eq 10)";
const forceKillCommand = "Stop-Process -Id $listenerPid -Force";
const runCommand =
'Invoke-OpenClawSchtasksWithTimeout -Arguments @("/Run", "/TN", $taskName) -TimeoutSeconds 30';
const portAssignment = `$port = ${port}`;
const stateCheckIndex = content.indexOf(stateCheck);
const runningGuardIndex = content.indexOf(runningGuard, stateCheckIndex);
const endIndex = content.indexOf(endCommand, runningGuardIndex);
const skipEndLogIndex = content.indexOf(skipEndLog, endIndex);
const portAssignmentIndex = content.indexOf(portAssignment);
const pollLoopIndex = content.indexOf(pollLoop, skipEndLogIndex);
const pollCallIndex = content.indexOf(pollCall, pollLoopIndex);
const forceKillBranchIndex = content.indexOf(forceKillBranch, pollCallIndex);
const forceKillCommandIndex = content.indexOf(forceKillCommand, forceKillBranchIndex);
const runIndex = content.indexOf(runCommand, forceKillCommandIndex);
expect(endIndex).toBeGreaterThanOrEqual(0);
expect(attemptsInitIndex).toBeGreaterThan(endIndex);
expect(pollLabelIndex).toBeGreaterThan(attemptsInitIndex);
expect(pollAttemptIncrementIndex).toBeGreaterThan(pollLabelIndex);
expect(pollNetstatCheckIndex).toBeGreaterThan(pollAttemptIncrementIndex);
expect(forceKillLabelIndex).toBeGreaterThan(pollNetstatCheckIndex);
expect(forceKillCommandIndex).toBeGreaterThan(forceKillLabelIndex);
expect(portReleasedLabelIndex).toBeGreaterThan(forceKillCommandIndex);
expect(runIndex).toBeGreaterThan(portReleasedLabelIndex);
expect(stateCheckIndex).toBeGreaterThanOrEqual(0);
expect(runningGuardIndex).toBeGreaterThan(stateCheckIndex);
expect(endIndex).toBeGreaterThan(runningGuardIndex);
expect(skipEndLogIndex).toBeGreaterThan(endIndex);
expect(portAssignmentIndex).toBeGreaterThanOrEqual(0);
expect(pollLoopIndex).toBeGreaterThan(skipEndLogIndex);
expect(pollCallIndex).toBeGreaterThan(pollLoopIndex);
expect(forceKillBranchIndex).toBeGreaterThan(pollCallIndex);
expect(forceKillCommandIndex).toBeGreaterThan(forceKillBranchIndex);
expect(runIndex).toBeGreaterThan(forceKillCommandIndex);
expect(content).not.toContain("timeout /t 3 /nobreak >nul");
expect(content).not.toContain("findstr");
expect(content).not.toContain("netstat -ano |");
expect(content).not.toContain("schtasks /End /TN");
}
beforeEach(() => {
@@ -296,21 +304,25 @@ exit 0
await cleanupScript(scriptPath);
});
it("creates a schtasks restart script on Windows", async () => {
it("creates a guarded schtasks restart script on Windows", async () => {
Object.defineProperty(process, "platform", { value: "win32" });
const { scriptPath, content } = await prepareAndReadScript({
OPENCLAW_PROFILE: "default",
});
expect(scriptPath.endsWith(".bat")).toBe(true);
expect(scriptPath.endsWith(".cmd")).toBe(true);
expect(content).toContain("@echo off");
expect(content).toContain("powershell -NoProfile -ExecutionPolicy Bypass -Command");
expect(content).not.toContain("-File");
expect(content).toContain('$ErrorActionPreference = "Continue"');
expect(content).toContain("gateway-restart.log");
expect(content).toContain("openclaw restart attempt source=update target=OpenClaw Gateway");
expect(content).toContain('schtasks /End /TN "OpenClaw Gateway"');
expect(content).toContain('schtasks /Run /TN "OpenClaw Gateway" >>');
expect(content).toContain("$taskName = 'OpenClaw Gateway'");
expect(content).toContain("function Invoke-OpenClawSchtasksWithTimeout");
expect(content).toContain("function Get-OpenClawScheduledTaskState");
expect(content).toContain("Get-ScheduledTask -TaskName $TaskName");
expect(content).toContain("openclaw restart skipped schtasks end");
expectWindowsRestartWaitOrdering(content);
// Batch self-cleanup
expect(content).toContain('del "%~f0"');
expect(content).toContain('del "%~f0" >nul 2>&1');
await cleanupScript(scriptPath);
});
@@ -321,8 +333,11 @@ exit 0
OPENCLAW_PROFILE: "default",
OPENCLAW_WINDOWS_TASK_NAME: "OpenClaw Gateway (custom)",
});
expect(content).toContain('schtasks /End /TN "OpenClaw Gateway (custom)"');
expect(content).toContain('schtasks /Run /TN "OpenClaw Gateway (custom)"');
expect(content).toContain("$taskName = 'OpenClaw Gateway (custom)'");
expect(content).toContain("Get-OpenClawScheduledTaskState -TaskName $taskName");
expect(content).toContain(
'Invoke-OpenClawSchtasksWithTimeout -Arguments @("/End", "/TN", $taskName) -TimeoutSeconds 10',
);
expectWindowsRestartWaitOrdering(content);
await cleanupScript(scriptPath);
});
@@ -337,10 +352,10 @@ exit 0
},
customPort,
);
expect(content).toContain(`netstat -ano | findstr /R /C:":${customPort} .*LISTENING" >nul`);
expect(content).toContain(
`for /f "tokens=5" %%P in ('netstat -ano ^| findstr /R /C:":${customPort} .*LISTENING"') do (`,
);
expect(content).toContain(`$port = ${customPort}`);
expect(content).toContain("Get-NetTCPConnection -LocalPort $Port -State Listen");
expect(content).toContain("& netstat.exe -ano -p tcp");
expect(content).not.toContain("findstr");
expectWindowsRestartWaitOrdering(content, customPort);
await cleanupScript(scriptPath);
});
@@ -371,7 +386,7 @@ exit 0
const { scriptPath, content } = await prepareAndReadScript({
OPENCLAW_PROFILE: "production",
});
expect(content).toContain('schtasks /End /TN "OpenClaw Gateway (production)"');
expect(content).toContain("$taskName = 'OpenClaw Gateway (production)'");
expectWindowsRestartWaitOrdering(content);
await cleanupScript(scriptPath);
});

View File

@@ -10,8 +10,8 @@ import {
resolveGatewayWindowsTaskName,
} from "../../daemon/constants.js";
import {
renderCmdRestartLogSetup,
renderPosixRestartLogSetup,
resolveGatewayRestartLogPath,
shellEscapeRestartLogValue,
} from "../../daemon/restart-logs.js";
import { normalizeOptionalString } from "../../shared/string-coerce.js";
@@ -25,12 +25,15 @@ function shellEscape(value: string): string {
return value.replace(/'/g, "'\\''");
}
/** Validates a string is safe for embedding in a batch (cmd.exe) script. */
function isBatchSafe(value: string): boolean {
// Reject characters that have special meaning in batch: & | < > ^ % " ` $
/** Validates a task name is safe for embedding in Windows restart scripts. */
function isWindowsTaskNameSafe(value: string): boolean {
return /^[A-Za-z0-9 _\-().]+$/.test(value);
}
function powerShellSingleQuote(value: string): string {
return `'${value.replace(/'/g, "''")}'`;
}
function resolveSystemdUnit(env: NodeJS.ProcessEnv): string {
const override = normalizeOptionalString(env.OPENCLAW_SYSTEMD_UNIT);
if (override) {
@@ -138,45 +141,189 @@ exit "$status"
`;
} else if (platform === "win32") {
const taskName = resolveWindowsTaskName(env);
if (!isBatchSafe(taskName)) {
if (!isWindowsTaskNameSafe(taskName)) {
return null;
}
const port =
Number.isFinite(gatewayPort) && gatewayPort > 0 ? gatewayPort : DEFAULT_GATEWAY_PORT;
const restartLog = renderCmdRestartLogSetup({ ...process.env, ...env });
filename = `openclaw-restart-${timestamp}.bat`;
const restartLogPath = resolveGatewayRestartLogPath({ ...process.env, ...env });
const quotedLogPath = powerShellSingleQuote(restartLogPath);
const quotedTaskName = powerShellSingleQuote(taskName);
filename = `openclaw-restart-${timestamp}.cmd`;
scriptContent = `@echo off
REM Standalone restart script survives parent process termination.
REM Wait briefly to ensure file locks are released after update.
timeout /t 2 /nobreak >nul
${restartLog.lines.join("\r\n")}
>> ${restartLog.quotedLogPath} 2>&1 echo [%DATE% %TIME%] openclaw restart attempt source=update target=${taskName}
schtasks /End /TN "${taskName}" >> ${restartLog.quotedLogPath} 2>&1
REM Poll for gateway port release before rerun; force-kill listener if stuck.
set /a attempts=0
:wait_for_port_release
set /a attempts+=1
netstat -ano | findstr /R /C:":${port} .*LISTENING" >nul
if errorlevel 1 goto port_released
if %attempts% GEQ 10 goto force_kill_listener
timeout /t 1 /nobreak >nul
goto wait_for_port_release
:force_kill_listener
for /f "tokens=5" %%P in ('netstat -ano ^| findstr /R /C:":${port} .*LISTENING"') do (
taskkill /F /PID %%P >> ${restartLog.quotedLogPath} 2>&1
goto port_released
)
:port_released
schtasks /Run /TN "${taskName}" >> ${restartLog.quotedLogPath} 2>&1
REM Standalone restart script - survives parent process termination.
REM Keep this as a cmd wrapper so Group Policy script execution policies
REM cannot block the update restart handoff before schtasks.exe runs.
setlocal
set "OPENCLAW_RESTART_SCRIPT=%~f0"
powershell -NoProfile -ExecutionPolicy Bypass -Command "$p=$env:OPENCLAW_RESTART_SCRIPT; $s=Get-Content -Raw -LiteralPath $p; $m='# POWERSHELL'; $i=$s.IndexOf($m); if ($i -lt 0) { exit 1 }; Invoke-Expression $s.Substring($i)"
set "status=%ERRORLEVEL%"
if not "%status%"=="0" (
>> ${restartLog.quotedLogPath} 2>&1 echo [%DATE% %TIME%] openclaw restart failed source=update status=%status%
) else (
>> ${restartLog.quotedLogPath} 2>&1 echo [%DATE% %TIME%] openclaw restart done source=update
)
REM Self-cleanup
del "%~f0"
del "%~f0" >nul 2>&1
exit /b %status%
# POWERSHELL
# Wait briefly to ensure file locks are released after update.
$ErrorActionPreference = "Continue"
Start-Sleep -Seconds 2
$logPath = ${quotedLogPath}
try {
$logDir = Split-Path -Parent $logPath
New-Item -ItemType Directory -Path $logDir -Force | Out-Null
Add-Content -LiteralPath $logPath -Value "[$(Get-Date -Format o)] openclaw restart log initialized"
} catch {
# Restart should still run if log setup is unavailable.
}
function Write-RestartLog {
param([string]$Message)
try {
Add-Content -LiteralPath $logPath -Value "[$(Get-Date -Format o)] $Message"
} catch {
}
}
function Join-OpenClawProcessArguments {
param([string[]]$Arguments)
($Arguments | ForEach-Object {
if ($_ -match "\\s") {
'"' + $_ + '"'
} else {
$_
}
}) -join " "
}
function Invoke-OpenClawSchtasksWithTimeout {
param(
[string[]]$Arguments,
[int]$TimeoutSeconds
)
$process = $null
try {
$startInfo = [System.Diagnostics.ProcessStartInfo]::new()
$startInfo.FileName = "schtasks.exe"
$startInfo.Arguments = Join-OpenClawProcessArguments -Arguments $Arguments
$startInfo.UseShellExecute = $false
$startInfo.RedirectStandardOutput = $true
$startInfo.RedirectStandardError = $true
$process = [System.Diagnostics.Process]::Start($startInfo)
if (-not $process.WaitForExit($TimeoutSeconds * 1000)) {
try {
$process.Kill()
} catch {
}
Write-RestartLog "openclaw restart schtasks timeout source=update args=$($Arguments -join ' ')"
return 124
}
$stdout = $process.StandardOutput.ReadToEnd()
$stderr = $process.StandardError.ReadToEnd()
if ($stdout) {
Write-RestartLog $stdout.Trim()
}
if ($stderr) {
Write-RestartLog $stderr.Trim()
}
return $process.ExitCode
} catch {
Write-RestartLog "openclaw restart schtasks failed source=update args=$($Arguments -join ' ') error=$($_.Exception.Message)"
return 1
}
}
function Get-OpenClawScheduledTaskState {
param([string]$TaskName)
try {
$task = Get-ScheduledTask -TaskName $TaskName -ErrorAction Stop
if ($task -and $task.State) {
return [string]$task.State
}
} catch {
}
try {
$queryOutput = & schtasks.exe /Query /TN $TaskName /FO LIST 2>$null
foreach ($line in $queryOutput) {
if ($line -match "^\\s*Status:\\s*(.+?)\\s*$") {
return $Matches[1]
}
}
} catch {
}
return "Unknown"
}
function Get-OpenClawListenerPids {
param([int]$Port)
$listenerPids = @()
try {
if (Get-Command Get-NetTCPConnection -ErrorAction SilentlyContinue) {
$listenerPids += Get-NetTCPConnection -LocalPort $Port -State Listen -ErrorAction SilentlyContinue |
ForEach-Object { [int]$_.OwningProcess }
}
} catch {
}
if ($listenerPids.Count -eq 0) {
try {
$portPattern = [regex]::Escape(":$Port")
$linePattern = "^\\s*TCP\\s+\\S+$portPattern\\s+\\S+\\s+LISTENING\\s+(\\d+)\\s*$"
& netstat.exe -ano -p tcp 2>$null | ForEach-Object {
if ($_ -match $linePattern) {
$listenerPids += [int]$Matches[1]
}
}
} catch {
}
}
$listenerPids | Sort-Object -Unique
}
$taskName = ${quotedTaskName}
$port = ${port}
Write-RestartLog "openclaw restart attempt source=update target=$taskName"
$taskState = Get-OpenClawScheduledTaskState -TaskName $taskName
if ($taskState -eq "Running") {
$endStatus = Invoke-OpenClawSchtasksWithTimeout -Arguments @("/End", "/TN", $taskName) -TimeoutSeconds 10
if ($endStatus -ne 0) {
Write-RestartLog "openclaw restart schtasks end did not complete cleanly source=update status=$endStatus"
}
} else {
Write-RestartLog "openclaw restart skipped schtasks end source=update state=$taskState"
}
for ($attempt = 1; $attempt -le 10; $attempt++) {
$listeners = @(Get-OpenClawListenerPids -Port $port)
if ($listeners.Count -eq 0) {
break
}
if ($attempt -eq 10) {
foreach ($listenerPid in $listeners) {
try {
Stop-Process -Id $listenerPid -Force -ErrorAction Stop
Write-RestartLog "openclaw restart killed stale listener source=update pid=$listenerPid"
} catch {
Write-RestartLog "openclaw restart failed to kill stale listener source=update pid=$listenerPid error=$($_.Exception.Message)"
}
}
break
}
Start-Sleep -Seconds 1
}
$status = Invoke-OpenClawSchtasksWithTimeout -Arguments @("/Run", "/TN", $taskName) -TimeoutSeconds 30
if ($status -eq 0) {
Write-RestartLog "openclaw restart done source=update"
} else {
Write-RestartLog "openclaw restart failed source=update status=$status"
}
exit $status
`;
} else {
return null;