fix(darwin): remove self-kickstart from launchd gateway restart; rely on KeepAlive

When the gateway needs a config-triggered restart under launchd, calling
`launchctl kickstart -k` from within the service itself races with
launchd's async bootout state machine:

1. `kickstart -k` initiates a launchd bootout → SIGTERM to self
2. Gateway ignores SIGTERM during shutdown → process doesn't exit
3. 2s `spawnSync` timeout kills the launchctl child, but launchd
   continues the bootout asynchronously
4. Fallback `launchctl bootstrap` fails with EIO (service mid-bootout)
5. In-process restart runs on the same PID that launchd will SIGKILL
6. LaunchAgent is permanently unloaded — no auto-restart

Fix: on darwin/launchd, skip `triggerOpenClawRestart()` entirely.
The caller already calls `exitProcess(0)` for supervised mode, and
`KeepAlive=true` (always set in the plist template) restarts the
service within ~1 second.

The schtasks (Windows) path is unchanged — Windows doesn't have an
equivalent KeepAlive mechanism.
This commit is contained in:
daymade
2026-03-08 19:18:38 +08:00
committed by Peter Steinberger
parent 53fb317e7f
commit 5f45e76d61
2 changed files with 17 additions and 26 deletions

View File

@@ -46,16 +46,17 @@ function clearSupervisorHints() {
}
}
function expectLaunchdKickstartSupervised(params?: { launchJobLabel?: string }) {
function expectLaunchdSupervisedWithoutKickstart(params?: { launchJobLabel?: string }) {
setPlatform("darwin");
if (params?.launchJobLabel) {
process.env.LAUNCH_JOB_LABEL = params.launchJobLabel;
}
process.env.OPENCLAW_LAUNCHD_LABEL = "ai.openclaw.gateway";
triggerOpenClawRestartMock.mockReturnValue({ ok: true, method: "launchctl" });
const result = restartGatewayProcessWithFreshPid();
expect(result.mode).toBe("supervised");
expect(triggerOpenClawRestartMock).toHaveBeenCalledOnce();
// launchd path no longer calls triggerOpenClawRestart — it relies on
// KeepAlive=true to restart the service after the caller exits.
expect(triggerOpenClawRestartMock).not.toHaveBeenCalled();
expect(spawnMock).not.toHaveBeenCalled();
}
@@ -67,35 +68,19 @@ describe("restartGatewayProcessWithFreshPid", () => {
expect(spawnMock).not.toHaveBeenCalled();
});
it("returns supervised when launchd hints are present on macOS", () => {
it("returns supervised when launchd hints are present on macOS (no kickstart)", () => {
clearSupervisorHints();
setPlatform("darwin");
process.env.LAUNCH_JOB_LABEL = "ai.openclaw.gateway";
triggerOpenClawRestartMock.mockReturnValue({ ok: true, method: "launchctl" });
const result = restartGatewayProcessWithFreshPid();
expect(result.mode).toBe("supervised");
expect(triggerOpenClawRestartMock).toHaveBeenCalledOnce();
// launchd relies on KeepAlive=true — no kickstart call needed.
expect(triggerOpenClawRestartMock).not.toHaveBeenCalled();
expect(spawnMock).not.toHaveBeenCalled();
});
it("runs launchd kickstart helper on macOS when launchd label is set", () => {
expectLaunchdKickstartSupervised({ launchJobLabel: "ai.openclaw.gateway" });
});
it("returns failed when launchd kickstart helper fails", () => {
setPlatform("darwin");
process.env.LAUNCH_JOB_LABEL = "ai.openclaw.gateway";
process.env.OPENCLAW_LAUNCHD_LABEL = "ai.openclaw.gateway";
triggerOpenClawRestartMock.mockReturnValue({
ok: false,
method: "launchctl",
detail: "spawn failed",
});
const result = restartGatewayProcessWithFreshPid();
expect(result.mode).toBe("failed");
expect(result.detail).toContain("spawn failed");
it("returns supervised on macOS when launchd label is set (no kickstart)", () => {
expectLaunchdSupervisedWithoutKickstart({ launchJobLabel: "ai.openclaw.gateway" });
});
it("does not schedule kickstart on non-darwin platforms", () => {
@@ -133,7 +118,7 @@ describe("restartGatewayProcessWithFreshPid", () => {
it("returns supervised when OPENCLAW_LAUNCHD_LABEL is set (stock launchd plist)", () => {
clearSupervisorHints();
expectLaunchdKickstartSupervised();
expectLaunchdSupervisedWithoutKickstart();
});
it("returns supervised when OPENCLAW_SYSTEMD_UNIT is set", () => {

View File

@@ -30,7 +30,13 @@ export function restartGatewayProcessWithFreshPid(): GatewayRespawnResult {
}
const supervisor = detectRespawnSupervisor(process.env);
if (supervisor) {
if (supervisor === "launchd" || supervisor === "schtasks") {
// launchd: exit(0) is sufficient — KeepAlive=true restarts the service.
// Calling `kickstart -k` from within the service itself races with
// launchd's async bootout state machine: the spawnSync timeout kills the
// launchctl child, but launchd continues the bootout and eventually
// SIGKILLs this process, leaving the LaunchAgent permanently unloaded.
// See: https://github.com/openclaw/openclaw/issues/39760
if (supervisor === "schtasks") {
const restart = triggerOpenClawRestart();
if (!restart.ok) {
return {