diff --git a/CHANGELOG.md b/CHANGELOG.md index dbeb0470988..1bc3560cc0c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -148,6 +148,7 @@ Docs: https://docs.openclaw.ai ### Fixes - Sandbox/security: block credential-path binds even when sandbox home paths resolve through canonical aliases, so agent containers cannot mount user secret stores through alternate home-directory paths. (#59157) Thanks @eleqtrizit. +- Gateway/Windows scheduled tasks: preserve Task Scheduler settings on reinstall, fail loud when Scheduled Task `/Run` does not start, and report fast failed restarts with the actual elapsed time instead of a fake 60s timeout. (#59335) Thanks @tmimmanuel. ## 2026.4.1-beta.1 diff --git a/src/cli/daemon-cli/lifecycle.test.ts b/src/cli/daemon-cli/lifecycle.test.ts index ad4ef1f7c3b..0835e77e2e6 100644 --- a/src/cli/daemon-cli/lifecycle.test.ts +++ b/src/cli/daemon-cli/lifecycle.test.ts @@ -6,6 +6,8 @@ type RestartHealthSnapshot = { staleGatewayPids: number[]; runtime: { status?: string }; portUsage: { port: number; status: string; listeners: []; hints: []; errors?: string[] }; + waitOutcome?: string; + elapsedMs?: number; }; type RestartPostCheckContext = { @@ -32,7 +34,7 @@ const waitForGatewayHealthyRestart = vi.fn(); const terminateStaleGatewayPids = vi.fn(); const renderGatewayPortHealthDiagnostics = vi.fn(() => ["diag: unhealthy port"]); const renderRestartDiagnostics = vi.fn(() => ["diag: unhealthy runtime"]); -const resolveGatewayPort = vi.fn(() => 18789); +const resolveGatewayPort = vi.hoisted(() => vi.fn((_cfg?: unknown, _env?: unknown) => 18789)); const findVerifiedGatewayListenerPidsOnPortSync = vi.fn<(port: number) => number[]>(() => []); const signalVerifiedGatewayPidSync = vi.fn<(pid: number, signal: "SIGTERM" | "SIGUSR1") => void>(); const formatGatewayPidList = vi.fn<(pids: number[]) => string>((pids) => pids.join(", ")); @@ -47,12 +49,12 @@ const probeGateway = vi.fn< }> >(); const isRestartEnabled = vi.fn<(config?: { commands?: unknown }) => boolean>(() => true); -const loadConfig = vi.fn(() => ({})); +const loadConfig = vi.hoisted(() => vi.fn(() => ({}))); vi.mock("../../config/config.js", () => ({ loadConfig: () => loadConfig(), readBestEffortConfig: async () => loadConfig(), - resolveGatewayPort, + resolveGatewayPort: (cfg?: unknown, env?: unknown) => resolveGatewayPort(cfg, env), })); vi.mock("../../infra/gateway-processes.js", () => ({ @@ -230,13 +232,15 @@ describe("runDaemonRestart health checks", () => { expect(waitForGatewayHealthyRestart).toHaveBeenCalledTimes(1); }); - it("fails restart when gateway remains unhealthy", async () => { + it("fails restart when gateway remains unhealthy after the full timeout", async () => { const { formatCliCommand } = await import("../command-format.js"); const unhealthy: RestartHealthSnapshot = { healthy: false, staleGatewayPids: [], runtime: { status: "stopped" }, portUsage: { port: 18789, status: "free", listeners: [], hints: [] }, + waitOutcome: "timeout", + elapsedMs: 60_000, }; waitForGatewayHealthyRestart.mockResolvedValue(unhealthy); @@ -251,6 +255,30 @@ describe("runDaemonRestart health checks", () => { expect(renderRestartDiagnostics).toHaveBeenCalledTimes(1); }); + it("fails restart with a stopped-free message when the waiter exits early", async () => { + const { formatCliCommand } = await import("../command-format.js"); + const unhealthy: RestartHealthSnapshot = { + healthy: false, + staleGatewayPids: [], + runtime: { status: "stopped" }, + portUsage: { port: 18789, status: "free", listeners: [], hints: [] }, + waitOutcome: "stopped-free", + elapsedMs: 12_500, + }; + waitForGatewayHealthyRestart.mockResolvedValue(unhealthy); + + await expect(runDaemonRestart({ json: true })).rejects.toMatchObject({ + message: + "Gateway restart failed after 13s: service stayed stopped and health checks never came up.", + hints: [ + formatCliCommand("openclaw gateway status --deep"), + formatCliCommand("openclaw doctor"), + ], + }); + expect(terminateStaleGatewayPids).not.toHaveBeenCalled(); + expect(renderRestartDiagnostics).toHaveBeenCalledTimes(1); + }); + it("signals an unmanaged gateway process on stop", async () => { findVerifiedGatewayListenerPidsOnPortSync.mockReturnValue([4200, 4200, 4300]); runServiceStop.mockImplementation(async (params: { onNotLoaded?: () => Promise }) => { diff --git a/src/cli/daemon-cli/lifecycle.ts b/src/cli/daemon-cli/lifecycle.ts index d3e01f66412..f0d7efda940 100644 --- a/src/cli/daemon-cli/lifecycle.ts +++ b/src/cli/daemon-cli/lifecycle.ts @@ -19,6 +19,7 @@ import { import { DEFAULT_RESTART_HEALTH_ATTEMPTS, DEFAULT_RESTART_HEALTH_DELAY_MS, + type GatewayRestartSnapshot, renderGatewayPortHealthDiagnostics, renderRestartDiagnostics, terminateStaleGatewayPids, @@ -31,6 +32,25 @@ import type { DaemonLifecycleOptions } from "./types.js"; const POST_RESTART_HEALTH_ATTEMPTS = DEFAULT_RESTART_HEALTH_ATTEMPTS; const POST_RESTART_HEALTH_DELAY_MS = DEFAULT_RESTART_HEALTH_DELAY_MS; +function formatRestartFailure(params: { + health: GatewayRestartSnapshot; + port: number; + timeoutSeconds: number; +}): { statusLine: string; failMessage: string } { + if (params.health.waitOutcome === "stopped-free") { + const elapsedSeconds = Math.max(1, Math.round((params.health.elapsedMs ?? 0) / 1000)); + return { + statusLine: `Gateway restart failed after ${elapsedSeconds}s: service stayed stopped and port ${params.port} stayed free.`, + failMessage: `Gateway restart failed after ${elapsedSeconds}s: service stayed stopped and health checks never came up.`, + }; + } + + return { + statusLine: `Timed out after ${params.timeoutSeconds}s waiting for gateway port ${params.port} to become healthy.`, + failMessage: `Gateway restart timed out after ${params.timeoutSeconds}s waiting for health checks.`, + }; +} + async function resolveGatewayLifecyclePort(service = resolveGatewayService()) { const command = await service.readCommand(process.env).catch(() => null); const serviceEnv = command?.environment ?? undefined; @@ -234,13 +254,17 @@ export async function runDaemonRestart(opts: DaemonLifecycleOptions = {}): Promi } const diagnostics = renderRestartDiagnostics(health); - const timeoutLine = `Timed out after ${restartWaitSeconds}s waiting for gateway port ${restartPort} to become healthy.`; + const failure = formatRestartFailure({ + health, + port: restartPort, + timeoutSeconds: restartWaitSeconds, + }); const runningNoPortLine = health.runtime.status === "running" && health.portUsage.status === "free" ? `Gateway process is running but port ${restartPort} is still free (startup hang/crash loop or very slow VM startup).` : null; if (!json) { - defaultRuntime.log(theme.warn(timeoutLine)); + defaultRuntime.log(theme.warn(failure.statusLine)); if (runningNoPortLine) { defaultRuntime.log(theme.warn(runningNoPortLine)); } @@ -248,14 +272,14 @@ export async function runDaemonRestart(opts: DaemonLifecycleOptions = {}): Promi defaultRuntime.log(theme.muted(line)); } } else { - warnings.push(timeoutLine); + warnings.push(failure.statusLine); if (runningNoPortLine) { warnings.push(runningNoPortLine); } warnings.push(...diagnostics); } - fail(`Gateway restart timed out after ${restartWaitSeconds}s waiting for health checks.`, [ + fail(failure.failMessage, [ formatCliCommand("openclaw gateway status --deep"), formatCliCommand("openclaw doctor"), ]); diff --git a/src/cli/daemon-cli/restart-health.test.ts b/src/cli/daemon-cli/restart-health.test.ts index c4b8eb3b07c..24595639408 100644 --- a/src/cli/daemon-cli/restart-health.test.ts +++ b/src/cli/daemon-cli/restart-health.test.ts @@ -3,6 +3,7 @@ import type { GatewayService } from "../../daemon/service.js"; import type { PortListenerKind, PortUsage } from "../../infra/ports.js"; const inspectPortUsage = vi.hoisted(() => vi.fn<(port: number) => Promise>()); +const sleep = vi.hoisted(() => vi.fn(async (_ms: number) => {})); const classifyPortListener = vi.hoisted(() => vi.fn<(_listener: unknown, _port: number) => PortListenerKind>(() => "gateway"), ); @@ -18,6 +19,14 @@ vi.mock("../../gateway/probe.js", () => ({ probeGateway: (opts: unknown) => probeGateway(opts), })); +vi.mock("../../utils.js", async () => { + const actual = await vi.importActual("../../utils.js"); + return { + ...actual, + sleep: (ms: number) => sleep(ms), + }; +}); + const originalPlatform = process.platform; function makeGatewayService( @@ -88,6 +97,7 @@ describe("inspectGatewayRestart", () => { listeners: [], hints: [], }); + sleep.mockReset(); classifyPortListener.mockReset(); classifyPortListener.mockReturnValue("gateway"); probeGateway.mockReset(); @@ -240,4 +250,58 @@ describe("inspectGatewayRestart", () => { expect(snapshot.healthy).toBe(true); expect(probeGateway).not.toHaveBeenCalled(); }); + + it("annotates stopped-free early exits with the actual elapsed time", async () => { + const service = makeGatewayService({ status: "stopped" }); + inspectPortUsage.mockResolvedValue({ + port: 18789, + status: "free", + listeners: [], + hints: [], + }); + + const { waitForGatewayHealthyRestart } = await import("./restart-health.js"); + const snapshot = await waitForGatewayHealthyRestart({ + service, + port: 18789, + attempts: 120, + delayMs: 500, + }); + + expect(snapshot).toMatchObject({ + healthy: false, + runtime: { status: "stopped" }, + portUsage: { status: "free" }, + waitOutcome: "stopped-free", + elapsedMs: 12_500, + }); + expect(sleep).toHaveBeenCalledTimes(25); + }); + + it("annotates timeout waits when the health loop exhausts all attempts", async () => { + const service = makeGatewayService({ status: "running", pid: 8000 }); + inspectPortUsage.mockResolvedValue({ + port: 18789, + status: "free", + listeners: [], + hints: [], + }); + + const { waitForGatewayHealthyRestart } = await import("./restart-health.js"); + const snapshot = await waitForGatewayHealthyRestart({ + service, + port: 18789, + attempts: 4, + delayMs: 1_000, + }); + + expect(snapshot).toMatchObject({ + healthy: false, + runtime: { status: "running", pid: 8000 }, + portUsage: { status: "free" }, + waitOutcome: "timeout", + elapsedMs: 4_000, + }); + expect(sleep).toHaveBeenCalledTimes(4); + }); }); diff --git a/src/cli/daemon-cli/restart-health.ts b/src/cli/daemon-cli/restart-health.ts index 43102cedee8..6b94321d671 100644 --- a/src/cli/daemon-cli/restart-health.ts +++ b/src/cli/daemon-cli/restart-health.ts @@ -16,11 +16,15 @@ export const DEFAULT_RESTART_HEALTH_ATTEMPTS = Math.ceil( DEFAULT_RESTART_HEALTH_TIMEOUT_MS / DEFAULT_RESTART_HEALTH_DELAY_MS, ); +export type GatewayRestartWaitOutcome = "healthy" | "stale-pids" | "stopped-free" | "timeout"; + export type GatewayRestartSnapshot = { runtime: GatewayServiceRuntime; portUsage: PortUsage; healthy: boolean; staleGatewayPids: number[]; + waitOutcome?: GatewayRestartWaitOutcome; + elapsedMs?: number; }; export type GatewayPortHealthSnapshot = { @@ -201,6 +205,26 @@ export async function inspectGatewayRestart(params: { }; } +function shouldEarlyExitStoppedFree( + snapshot: GatewayRestartSnapshot, + attempt: number, + minAttempt: number, +): boolean { + return ( + attempt >= minAttempt && + snapshot.runtime.status === "stopped" && + snapshot.portUsage.status === "free" + ); +} + +function withWaitContext( + snapshot: GatewayRestartSnapshot, + waitOutcome: GatewayRestartWaitOutcome, + elapsedMs: number, +): GatewayRestartSnapshot { + return { ...snapshot, waitOutcome, elapsedMs }; +} + export async function waitForGatewayHealthyRestart(params: { service: GatewayService; port: number; @@ -219,12 +243,24 @@ export async function waitForGatewayHealthyRestart(params: { includeUnknownListenersAsStale: params.includeUnknownListenersAsStale, }); + let consecutiveStoppedFreeCount = 0; + const STOPPED_FREE_THRESHOLD = 6; + const minAttemptForEarlyExit = Math.min(Math.ceil(10_000 / delayMs), Math.floor(attempts / 2)); + for (let attempt = 0; attempt < attempts; attempt += 1) { if (snapshot.healthy) { - return snapshot; + return withWaitContext(snapshot, "healthy", attempt * delayMs); } if (snapshot.staleGatewayPids.length > 0 && snapshot.runtime.status !== "running") { - return snapshot; + return withWaitContext(snapshot, "stale-pids", attempt * delayMs); + } + if (shouldEarlyExitStoppedFree(snapshot, attempt, minAttemptForEarlyExit)) { + consecutiveStoppedFreeCount += 1; + if (consecutiveStoppedFreeCount >= STOPPED_FREE_THRESHOLD) { + return withWaitContext(snapshot, "stopped-free", attempt * delayMs); + } + } else if (snapshot.runtime.status !== "stopped" || snapshot.portUsage.status !== "free") { + consecutiveStoppedFreeCount = 0; } await sleep(delayMs); snapshot = await inspectGatewayRestart({ @@ -235,7 +271,7 @@ export async function waitForGatewayHealthyRestart(params: { }); } - return snapshot; + return withWaitContext(snapshot, "timeout", attempts * delayMs); } export async function waitForGatewayHealthyListener(params: { diff --git a/src/daemon/schtasks.install.test.ts b/src/daemon/schtasks.install.test.ts index 16311b21dfd..f4b79bbee41 100644 --- a/src/daemon/schtasks.install.test.ts +++ b/src/daemon/schtasks.install.test.ts @@ -6,16 +6,18 @@ import { beforeEach, describe, expect, it, vi } from "vitest"; import { installScheduledTask, readScheduledTaskCommand } from "./schtasks.js"; const schtasksCalls: string[][] = []; +const schtasksResponses: { code: number; stdout: string; stderr: string }[] = []; vi.mock("./schtasks-exec.js", () => ({ execSchtasks: async (argv: string[]) => { schtasksCalls.push(argv); - return { code: 0, stdout: "", stderr: "" }; + return schtasksResponses.shift() ?? { code: 0, stdout: "", stderr: "" }; }, })); beforeEach(() => { schtasksCalls.length = 0; + schtasksResponses.length = 0; }); describe("installScheduledTask", () => { @@ -97,8 +99,9 @@ describe("installScheduledTask", () => { expect(parsed?.environment).not.toHaveProperty("OC_EMPTY"); expect(schtasksCalls[0]).toEqual(["/Query"]); - expect(schtasksCalls[1]?.[0]).toBe("/Create"); - expect(schtasksCalls[2]).toEqual(["/Run", "/TN", "OpenClaw Gateway"]); + expect(schtasksCalls[1]).toEqual(["/Query", "/TN", "OpenClaw Gateway"]); + expect(schtasksCalls[2]?.[0]).toBe("/Change"); + expect(schtasksCalls[3]).toEqual(["/Run", "/TN", "OpenClaw Gateway"]); }); }); @@ -134,6 +137,100 @@ describe("installScheduledTask", () => { }); }); + it("uses /Create when the task does not exist yet", async () => { + await withUserProfileDir(async (_tmpDir, env) => { + schtasksResponses.push( + { code: 0, stdout: "", stderr: "" }, + { code: 1, stdout: "", stderr: "ERROR: The system cannot find the file specified." }, + ); + + await installScheduledTask({ + env, + stdout: new PassThrough(), + programArguments: ["node", "gateway.js"], + environment: {}, + }); + + expect(schtasksCalls[0]).toEqual(["/Query"]); + expect(schtasksCalls[1]).toEqual(["/Query", "/TN", "OpenClaw Gateway"]); + expect(schtasksCalls[2]?.[0]).toBe("/Create"); + expect(schtasksCalls[3]).toEqual(["/Run", "/TN", "OpenClaw Gateway"]); + }); + }); + + it("falls back to /Create when /Change fails on an existing task", async () => { + await withUserProfileDir(async (_tmpDir, env) => { + schtasksResponses.push( + { code: 0, stdout: "", stderr: "" }, + { code: 0, stdout: "", stderr: "" }, + { code: 1, stdout: "", stderr: "ERROR: Access is denied." }, + ); + + await installScheduledTask({ + env, + stdout: new PassThrough(), + programArguments: ["node", "gateway.js"], + environment: {}, + }); + + expect(schtasksCalls[0]).toEqual(["/Query"]); + expect(schtasksCalls[1]).toEqual(["/Query", "/TN", "OpenClaw Gateway"]); + expect(schtasksCalls[2]?.[0]).toBe("/Change"); + expect(schtasksCalls[3]?.[0]).toBe("/Create"); + expect(schtasksCalls[4]).toEqual(["/Run", "/TN", "OpenClaw Gateway"]); + }); + }); + + it("throws when /Run fails after updating an existing task", async () => { + await withUserProfileDir(async (_tmpDir, env) => { + schtasksResponses.push( + { code: 0, stdout: "", stderr: "" }, + { code: 0, stdout: "", stderr: "" }, + { code: 0, stdout: "", stderr: "" }, + { code: 1, stdout: "", stderr: "ERROR: Access is denied." }, + ); + + await expect( + installScheduledTask({ + env, + stdout: new PassThrough(), + programArguments: ["node", "gateway.js"], + environment: {}, + }), + ).rejects.toThrow("schtasks run failed: ERROR: Access is denied."); + + expect(schtasksCalls[0]).toEqual(["/Query"]); + expect(schtasksCalls[1]).toEqual(["/Query", "/TN", "OpenClaw Gateway"]); + expect(schtasksCalls[2]?.[0]).toBe("/Change"); + expect(schtasksCalls[3]).toEqual(["/Run", "/TN", "OpenClaw Gateway"]); + }); + }); + + it("throws when /Run fails after creating a new task", async () => { + await withUserProfileDir(async (_tmpDir, env) => { + schtasksResponses.push( + { code: 0, stdout: "", stderr: "" }, + { code: 1, stdout: "", stderr: "ERROR: The system cannot find the file specified." }, + { code: 0, stdout: "", stderr: "" }, + { code: 1, stdout: "", stderr: "ERROR: Access is denied." }, + ); + + await expect( + installScheduledTask({ + env, + stdout: new PassThrough(), + programArguments: ["node", "gateway.js"], + environment: {}, + }), + ).rejects.toThrow("schtasks run failed: ERROR: Access is denied."); + + expect(schtasksCalls[0]).toEqual(["/Query"]); + expect(schtasksCalls[1]).toEqual(["/Query", "/TN", "OpenClaw Gateway"]); + expect(schtasksCalls[2]?.[0]).toBe("/Create"); + expect(schtasksCalls[3]).toEqual(["/Run", "/TN", "OpenClaw Gateway"]); + }); + }); + it("does not persist a frozen PATH snapshot into the generated task script", async () => { await withUserProfileDir(async (_tmpDir, env) => { const { scriptPath } = await installScheduledTask({ diff --git a/src/daemon/schtasks.startup-fallback.test.ts b/src/daemon/schtasks.startup-fallback.test.ts index e05ee499201..c8e84a3376a 100644 --- a/src/daemon/schtasks.startup-fallback.test.ts +++ b/src/daemon/schtasks.startup-fallback.test.ts @@ -123,6 +123,7 @@ describe("Windows startup fallback", () => { await withWindowsEnv("openclaw-win-startup-", async ({ env }) => { schtasksResponses.push( { code: 0, stdout: "", stderr: "" }, + { code: 1, stdout: "", stderr: "not found" }, { code: 5, stdout: "", stderr: "ERROR: Access is denied." }, ); @@ -158,6 +159,7 @@ describe("Windows startup fallback", () => { await withWindowsEnv("openclaw-win-startup-", async ({ env }) => { schtasksResponses.push( { code: 0, stdout: "", stderr: "" }, + { code: 1, stdout: "", stderr: "not found" }, { code: 124, stdout: "", stderr: "schtasks timed out after 15000ms" }, ); diff --git a/src/daemon/schtasks.stop.test.ts b/src/daemon/schtasks.stop.test.ts index fe05df20008..6cb963c74fd 100644 --- a/src/daemon/schtasks.stop.test.ts +++ b/src/daemon/schtasks.stop.test.ts @@ -186,4 +186,20 @@ describe("Scheduled Task stop/restart cleanup", () => { expect(schtasksCalls.at(-1)).toEqual(["/Run", "/TN", "OpenClaw Gateway"]); }); }); + + it("throws when /Run fails during restart", async () => { + await withPreparedGatewayTask(async ({ env, stdout }) => { + schtasksResponses.push( + { ...SUCCESS_RESPONSE }, + { ...SUCCESS_RESPONSE }, + { ...SUCCESS_RESPONSE }, + { code: 1, stdout: "", stderr: "ERROR: Access is denied." }, + ); + + await expect(restartScheduledTask({ env, stdout })).rejects.toThrow( + "schtasks run failed: ERROR: Access is denied.", + ); + expect(schtasksCalls.at(-1)).toEqual(["/Run", "/TN", "OpenClaw Gateway"]); + }); + }); }); diff --git a/src/daemon/schtasks.ts b/src/daemon/schtasks.ts index 2a99b89d11c..3ac7928fd35 100644 --- a/src/daemon/schtasks.ts +++ b/src/daemon/schtasks.ts @@ -581,6 +581,45 @@ export async function stageScheduledTask({ return { scriptPath }; } +async function updateExistingScheduledTask(params: { + env: GatewayServiceEnv; + stdout: NodeJS.WritableStream; + taskName: string; + quotedScript: string; + scriptPath: string; +}): Promise { + if (!(await isRegisteredScheduledTask(params.env))) { + return false; + } + const change = await execSchtasks([ + "/Change", + "/TN", + params.taskName, + "/TR", + params.quotedScript, + ]); + if (change.code !== 0) { + return false; + } + await runScheduledTaskOrThrow(params.taskName); + writeFormattedLines( + params.stdout, + [ + { label: "Updated Scheduled Task", value: params.taskName }, + { label: "Task script", value: params.scriptPath }, + ], + { leadingBlankLine: true }, + ); + return true; +} + +async function runScheduledTaskOrThrow(taskName: string): Promise { + const run = await execSchtasks(["/Run", "/TN", taskName]); + if (run.code !== 0) { + throw new Error(`schtasks run failed: ${run.stderr || run.stdout}`.trim()); + } +} + async function activateScheduledTask(params: { env: GatewayServiceEnv; stdout: NodeJS.WritableStream; @@ -591,6 +630,11 @@ async function activateScheduledTask(params: { const taskName = resolveTaskName(params.env); const quotedScript = quoteSchtasksArg(params.scriptPath); + + if (await updateExistingScheduledTask({ ...params, taskName, quotedScript })) { + return; + } + const baseArgs = [ "/Create", "/F", @@ -634,7 +678,7 @@ async function activateScheduledTask(params: { throw new Error(`schtasks create failed: ${detail}`.trim()); } - await execSchtasks(["/Run", "/TN", taskName]); + await runScheduledTaskOrThrow(taskName); // Ensure we don't end up writing to a clack spinner line (wizards show progress without a newline). writeFormattedLines( params.stdout, @@ -761,10 +805,7 @@ export async function restartScheduledTask({ } } } - const res = await execSchtasks(["/Run", "/TN", taskName]); - if (res.code !== 0) { - throw new Error(`schtasks run failed: ${res.stderr || res.stdout}`.trim()); - } + await runScheduledTaskOrThrow(taskName); stdout.write(`${formatLine("Restarted Scheduled Task", taskName)}\n`); return { outcome: "completed" }; }