fix(daemon): replace bootout with kickstart -k for launchd restarts on macOS

On macOS, launchctl bootout permanently unloads the LaunchAgent plist.
Even with KeepAlive: true, launchd cannot respawn a service whose plist
has been removed from its registry. This left users with a dead gateway
requiring manual 'openclaw gateway install' to recover.

Affected trigger paths:
- openclaw gateway restart from an agent session (#43311)
- SIGTERM on config reload (#43406)
- Gateway self-restart via SIGTERM (#43035)
- Hot reload on channel config change (#43049)

Switch restartLaunchAgent() to launchctl kickstart -k, which force-kills
and restarts the service without unloading the plist. When the restart
originates from inside the launchd-managed process tree, delegate to a
new detached handoff helper (launchd-restart-handoff.ts) to avoid the
caller being killed mid-command. Self-restart paths in process-respawn.ts
now schedule the detached start-after-exit handoff before exiting instead
of relying on exit/KeepAlive timing.

Fixes #43311, #43406, #43035, #43049
This commit is contained in:
Robin Waslander
2026-03-12 00:11:21 +01:00
parent e11be576fb
commit 3c0fd3dffe
6 changed files with 303 additions and 94 deletions

View File

@@ -81,6 +81,7 @@ Docs: https://docs.openclaw.ai
- Telegram/outbound HTML sends: chunk long HTML-mode messages, preserve plain-text fallback and silent-delivery params across retries, and cut over to plain text when HTML chunk planning cannot safely preserve the full message. (#42240) thanks @obviyus.
- Agents/embedded overload logs: include the failing model and provider in error-path console output, with lifecycle regression coverage for the rendered and sanitized `consoleMessage`. (#41236) thanks @jiarung.
- Agents/failover: treat Gemini `MALFORMED_RESPONSE` stop reasons as retryable timeouts so preview-model enum drift falls back cleanly instead of crashing the run, without also reclassifying malformed function-call errors. (#42292) Thanks @jnMetaCode.
- Gateway/macOS launchd restarts: keep the LaunchAgent registered during explicit restarts, hand off self-restarts through a detached launchd helper, and recover config/hot reload restart paths without unloading the service. Fixes #43311, #43406, #43035, and #43049.
- Discord/Telegram outbound runtime config: thread runtime-resolved config through Discord and Telegram send paths so SecretRef-based credentials stay resolved during message delivery. (#42352) Thanks @joshavant.
- Secrets/SecretRef: reject exec SecretRef traversal ids across schema, runtime, and gateway. (#42370) Thanks @joshavant.
- Telegram/docs: clarify that `channels.telegram.groups` allowlists chats while `groupAllowFrom` allowlists users inside those chats, and point invalid negative chat IDs at the right config key. (#42451) Thanks @altaywtf.

View File

@@ -0,0 +1,142 @@
import { spawn } from "node:child_process";
import os from "node:os";
import path from "node:path";
import { resolveGatewayLaunchAgentLabel } from "./constants.js";
export type LaunchdRestartHandoffMode = "kickstart" | "start-after-exit";
export type LaunchdRestartHandoffResult = {
ok: boolean;
pid?: number;
detail?: string;
};
export type LaunchdRestartTarget = {
domain: string;
label: string;
plistPath: string;
serviceTarget: string;
};
function resolveGuiDomain(): string {
if (typeof process.getuid !== "function") {
return "gui/501";
}
return `gui/${process.getuid()}`;
}
function resolveLaunchAgentLabel(env?: Record<string, string | undefined>): string {
const envLabel = env?.OPENCLAW_LAUNCHD_LABEL?.trim();
if (envLabel) {
return envLabel;
}
return resolveGatewayLaunchAgentLabel(env?.OPENCLAW_PROFILE);
}
export function resolveLaunchdRestartTarget(
env: Record<string, string | undefined> = process.env,
): LaunchdRestartTarget {
const domain = resolveGuiDomain();
const label = resolveLaunchAgentLabel(env);
const home = env.HOME?.trim() || os.homedir();
const plistPath = path.join(home, "Library", "LaunchAgents", `${label}.plist`);
return {
domain,
label,
plistPath,
serviceTarget: `${domain}/${label}`,
};
}
export function isCurrentProcessLaunchdServiceLabel(
label: string,
env: NodeJS.ProcessEnv = process.env,
): boolean {
const launchdLabel =
env.LAUNCH_JOB_LABEL?.trim() || env.LAUNCH_JOB_NAME?.trim() || env.XPC_SERVICE_NAME?.trim();
if (launchdLabel) {
return launchdLabel === label;
}
const configuredLabel = env.OPENCLAW_LAUNCHD_LABEL?.trim();
return Boolean(configuredLabel && configuredLabel === label);
}
function buildLaunchdRestartScript(mode: LaunchdRestartHandoffMode): string {
if (mode === "kickstart") {
return `service_target="$1"
domain="$2"
plist_path="$3"
sleep 1
if ! launchctl kickstart -k "$service_target" >/dev/null 2>&1; then
launchctl enable "$service_target" >/dev/null 2>&1
if launchctl bootstrap "$domain" "$plist_path" >/dev/null 2>&1; then
launchctl kickstart -k "$service_target" >/dev/null 2>&1 || true
fi
fi
`;
}
return `service_target="$1"
domain="$2"
plist_path="$3"
wait_pid="$4"
if [ -n "$wait_pid" ] && [ "$wait_pid" -gt 1 ] 2>/dev/null; then
attempts=0
while kill -0 "$wait_pid" >/dev/null 2>&1; do
attempts=$((attempts + 1))
if [ "$attempts" -ge 100 ]; then
break
fi
sleep 0.1
done
else
sleep 1
fi
if ! launchctl start "$service_target" >/dev/null 2>&1; then
launchctl enable "$service_target" >/dev/null 2>&1
if launchctl bootstrap "$domain" "$plist_path" >/dev/null 2>&1; then
launchctl start "$service_target" >/dev/null 2>&1 || launchctl kickstart -k "$service_target" >/dev/null 2>&1 || true
else
launchctl kickstart -k "$service_target" >/dev/null 2>&1 || true
fi
fi
`;
}
export function scheduleDetachedLaunchdRestartHandoff(params: {
env?: Record<string, string | undefined>;
mode: LaunchdRestartHandoffMode;
waitForPid?: number;
}): LaunchdRestartHandoffResult {
const target = resolveLaunchdRestartTarget(params.env);
const waitForPid =
typeof params.waitForPid === "number" && Number.isFinite(params.waitForPid)
? Math.floor(params.waitForPid)
: 0;
try {
const child = spawn(
"/bin/sh",
[
"-c",
buildLaunchdRestartScript(params.mode),
"openclaw-launchd-restart-handoff",
target.serviceTarget,
target.domain,
target.plistPath,
String(waitForPid),
],
{
detached: true,
stdio: "ignore",
env: { ...process.env, ...params.env },
},
);
child.unref();
return { ok: true, pid: child.pid ?? undefined };
} catch (err) {
return {
ok: false,
detail: err instanceof Error ? err.message : String(err),
};
}
}

View File

@@ -18,11 +18,17 @@ const state = vi.hoisted(() => ({
listOutput: "",
printOutput: "",
bootstrapError: "",
kickstartError: "",
kickstartFailuresRemaining: 0,
dirs: new Set<string>(),
dirModes: new Map<string, number>(),
files: new Map<string, string>(),
fileModes: new Map<string, number>(),
}));
const launchdRestartHandoffState = vi.hoisted(() => ({
isCurrentProcessLaunchdServiceLabel: vi.fn<(label: string) => boolean>(() => false),
scheduleDetachedLaunchdRestartHandoff: vi.fn((_params: unknown) => ({ ok: true, pid: 7331 })),
}));
const defaultProgramArguments = ["node", "-e", "process.exit(0)"];
function normalizeLaunchctlArgs(file: string, args: string[]): string[] {
@@ -49,10 +55,21 @@ vi.mock("./exec-file.js", () => ({
if (call[0] === "bootstrap" && state.bootstrapError) {
return { stdout: "", stderr: state.bootstrapError, code: 1 };
}
if (call[0] === "kickstart" && state.kickstartError && state.kickstartFailuresRemaining > 0) {
state.kickstartFailuresRemaining -= 1;
return { stdout: "", stderr: state.kickstartError, code: 1 };
}
return { stdout: "", stderr: "", code: 0 };
}),
}));
vi.mock("./launchd-restart-handoff.js", () => ({
isCurrentProcessLaunchdServiceLabel: (label: string) =>
launchdRestartHandoffState.isCurrentProcessLaunchdServiceLabel(label),
scheduleDetachedLaunchdRestartHandoff: (params: unknown) =>
launchdRestartHandoffState.scheduleDetachedLaunchdRestartHandoff(params),
}));
vi.mock("node:fs/promises", async (importOriginal) => {
const actual = await importOriginal<typeof import("node:fs/promises")>();
const wrapped = {
@@ -109,10 +126,19 @@ beforeEach(() => {
state.listOutput = "";
state.printOutput = "";
state.bootstrapError = "";
state.kickstartError = "";
state.kickstartFailuresRemaining = 0;
state.dirs.clear();
state.dirModes.clear();
state.files.clear();
state.fileModes.clear();
launchdRestartHandoffState.isCurrentProcessLaunchdServiceLabel.mockReset();
launchdRestartHandoffState.isCurrentProcessLaunchdServiceLabel.mockReturnValue(false);
launchdRestartHandoffState.scheduleDetachedLaunchdRestartHandoff.mockReset();
launchdRestartHandoffState.scheduleDetachedLaunchdRestartHandoff.mockReturnValue({
ok: true,
pid: 7331,
});
vi.clearAllMocks();
});
@@ -304,19 +330,37 @@ describe("launchd install", () => {
expect(state.fileModes.get(plistPath)).toBe(0o644);
});
it("restarts LaunchAgent with bootout-enable-bootstrap-kickstart order", async () => {
it("restarts LaunchAgent with kickstart and no bootout", async () => {
const env = createDefaultLaunchdEnv();
await restartLaunchAgent({
env,
stdout: new PassThrough(),
});
const domain = typeof process.getuid === "function" ? `gui/${process.getuid()}` : "gui/501";
const label = "ai.openclaw.gateway";
const serviceId = `${domain}/${label}`;
expect(state.launchctlCalls).toContainEqual(["kickstart", "-k", serviceId]);
expect(state.launchctlCalls.some((call) => call[0] === "bootout")).toBe(false);
expect(state.launchctlCalls.some((call) => call[0] === "bootstrap")).toBe(false);
});
it("falls back to bootstrap when kickstart cannot find the service", async () => {
const env = createDefaultLaunchdEnv();
state.kickstartError = "Could not find service";
state.kickstartFailuresRemaining = 1;
await restartLaunchAgent({
env,
stdout: new PassThrough(),
});
const domain = typeof process.getuid === "function" ? `gui/${process.getuid()}` : "gui/501";
const label = "ai.openclaw.gateway";
const plistPath = resolveLaunchAgentPlistPath(env);
const serviceId = `${domain}/${label}`;
const bootoutIndex = state.launchctlCalls.findIndex(
(c) => c[0] === "bootout" && c[1] === serviceId,
const kickstartCalls = state.launchctlCalls.filter(
(c) => c[0] === "kickstart" && c[1] === "-k" && c[2] === serviceId,
);
const enableIndex = state.launchctlCalls.findIndex(
(c) => c[0] === "enable" && c[1] === serviceId,
@@ -324,53 +368,27 @@ describe("launchd install", () => {
const bootstrapIndex = state.launchctlCalls.findIndex(
(c) => c[0] === "bootstrap" && c[1] === domain && c[2] === plistPath,
);
const kickstartIndex = state.launchctlCalls.findIndex(
(c) => c[0] === "kickstart" && c[1] === "-k" && c[2] === serviceId,
);
expect(bootoutIndex).toBeGreaterThanOrEqual(0);
expect(kickstartCalls).toHaveLength(2);
expect(enableIndex).toBeGreaterThanOrEqual(0);
expect(bootstrapIndex).toBeGreaterThanOrEqual(0);
expect(kickstartIndex).toBeGreaterThanOrEqual(0);
expect(bootoutIndex).toBeLessThan(enableIndex);
expect(enableIndex).toBeLessThan(bootstrapIndex);
expect(bootstrapIndex).toBeLessThan(kickstartIndex);
expect(state.launchctlCalls.some((call) => call[0] === "bootout")).toBe(false);
});
it("waits for previous launchd pid to exit before bootstrapping", async () => {
it("hands restart off to a detached helper when invoked from the current LaunchAgent", async () => {
const env = createDefaultLaunchdEnv();
state.printOutput = ["state = running", "pid = 4242"].join("\n");
const killSpy = vi.spyOn(process, "kill");
killSpy
.mockImplementationOnce(() => true)
.mockImplementationOnce(() => {
const err = new Error("no such process") as NodeJS.ErrnoException;
err.code = "ESRCH";
throw err;
});
launchdRestartHandoffState.isCurrentProcessLaunchdServiceLabel.mockReturnValue(true);
vi.useFakeTimers();
try {
const restartPromise = restartLaunchAgent({
env,
stdout: new PassThrough(),
});
await vi.advanceTimersByTimeAsync(250);
await restartPromise;
expect(killSpy).toHaveBeenCalledWith(4242, 0);
const domain = typeof process.getuid === "function" ? `gui/${process.getuid()}` : "gui/501";
const label = "ai.openclaw.gateway";
const bootoutIndex = state.launchctlCalls.findIndex(
(c) => c[0] === "bootout" && c[1] === `${domain}/${label}`,
);
const bootstrapIndex = state.launchctlCalls.findIndex((c) => c[0] === "bootstrap");
expect(bootoutIndex).toBeGreaterThanOrEqual(0);
expect(bootstrapIndex).toBeGreaterThanOrEqual(0);
expect(bootoutIndex).toBeLessThan(bootstrapIndex);
} finally {
vi.useRealTimers();
killSpy.mockRestore();
}
await restartLaunchAgent({
env,
stdout: new PassThrough(),
});
expect(launchdRestartHandoffState.scheduleDetachedLaunchdRestartHandoff).toHaveBeenCalledWith({
env,
mode: "kickstart",
});
expect(state.launchctlCalls).toEqual([]);
});
it("shows actionable guidance when launchctl gui domain does not support bootstrap", async () => {

View File

@@ -12,6 +12,10 @@ import {
buildLaunchAgentPlist as buildLaunchAgentPlistImpl,
readLaunchAgentProgramArgumentsFromFile,
} from "./launchd-plist.js";
import {
isCurrentProcessLaunchdServiceLabel,
scheduleDetachedLaunchdRestartHandoff,
} from "./launchd-restart-handoff.js";
import { formatLine, toPosixPath, writeFormattedLines } from "./output.js";
import { resolveGatewayStateDir, resolveHomeDir } from "./paths.js";
import { parseKeyValueOutput } from "./runtime-parse.js";
@@ -352,34 +356,6 @@ function isUnsupportedGuiDomain(detail: string): boolean {
);
}
const RESTART_PID_WAIT_TIMEOUT_MS = 10_000;
const RESTART_PID_WAIT_INTERVAL_MS = 200;
async function sleepMs(ms: number): Promise<void> {
await new Promise((resolve) => {
setTimeout(resolve, ms);
});
}
async function waitForPidExit(pid: number): Promise<void> {
if (!Number.isFinite(pid) || pid <= 1) {
return;
}
const deadline = Date.now() + RESTART_PID_WAIT_TIMEOUT_MS;
while (Date.now() < deadline) {
try {
process.kill(pid, 0);
} catch (err) {
const code = (err as NodeJS.ErrnoException).code;
if (code === "ESRCH" || code === "EPERM") {
return;
}
return;
}
await sleepMs(RESTART_PID_WAIT_INTERVAL_MS);
}
}
export async function stopLaunchAgent({ stdout, env }: GatewayServiceControlArgs): Promise<void> {
const domain = resolveGuiDomain();
const label = resolveLaunchAgentLabel({ env });
@@ -476,24 +452,43 @@ export async function restartLaunchAgent({
const domain = resolveGuiDomain();
const label = resolveLaunchAgentLabel({ env: serviceEnv });
const plistPath = resolveLaunchAgentPlistPath(serviceEnv);
const serviceTarget = `${domain}/${label}`;
const runtime = await execLaunchctl(["print", `${domain}/${label}`]);
const previousPid =
runtime.code === 0
? parseLaunchctlPrint(runtime.stdout || runtime.stderr || "").pid
: undefined;
const stop = await execLaunchctl(["bootout", `${domain}/${label}`]);
if (stop.code !== 0 && !isLaunchctlNotLoaded(stop)) {
throw new Error(`launchctl bootout failed: ${stop.stderr || stop.stdout}`.trim());
}
if (typeof previousPid === "number") {
await waitForPidExit(previousPid);
// Restart requests issued from inside the managed gateway process tree need a
// detached handoff. A direct `kickstart -k` would terminate the caller before
// it can finish the restart command.
if (isCurrentProcessLaunchdServiceLabel(label)) {
const handoff = scheduleDetachedLaunchdRestartHandoff({
env: serviceEnv,
mode: "kickstart",
});
if (!handoff.ok) {
throw new Error(`launchd restart handoff failed: ${handoff.detail ?? "unknown error"}`);
}
try {
stdout.write(`${formatLine("Scheduled LaunchAgent restart", serviceTarget)}\n`);
} catch (err: unknown) {
if ((err as NodeJS.ErrnoException)?.code !== "EPIPE") {
throw err;
}
}
return;
}
// launchd can persist "disabled" state after bootout; clear it before bootstrap
// (matches the same guard in installLaunchAgent).
await execLaunchctl(["enable", `${domain}/${label}`]);
const start = await execLaunchctl(["kickstart", "-k", serviceTarget]);
if (start.code === 0) {
try {
stdout.write(`${formatLine("Restarted LaunchAgent", serviceTarget)}\n`);
} catch (err: unknown) {
if ((err as NodeJS.ErrnoException)?.code !== "EPIPE") {
throw err;
}
}
return;
}
// If the service was previously booted out, re-register the plist and retry.
await execLaunchctl(["enable", serviceTarget]);
const boot = await execLaunchctl(["bootstrap", domain, plistPath]);
if (boot.code !== 0) {
const detail = (boot.stderr || boot.stdout).trim();
@@ -511,12 +506,12 @@ export async function restartLaunchAgent({
throw new Error(`launchctl bootstrap failed: ${detail}`);
}
const start = await execLaunchctl(["kickstart", "-k", `${domain}/${label}`]);
if (start.code !== 0) {
throw new Error(`launchctl kickstart failed: ${start.stderr || start.stdout}`.trim());
const retry = await execLaunchctl(["kickstart", "-k", serviceTarget]);
if (retry.code !== 0) {
throw new Error(`launchctl kickstart failed: ${retry.stderr || retry.stdout}`.trim());
}
try {
stdout.write(`${formatLine("Restarted LaunchAgent", `${domain}/${label}`)}\n`);
stdout.write(`${formatLine("Restarted LaunchAgent", serviceTarget)}\n`);
} catch (err: unknown) {
if ((err as NodeJS.ErrnoException)?.code !== "EPIPE") {
throw err;

View File

@@ -4,6 +4,7 @@ import { SUPERVISOR_HINT_ENV_VARS } from "./supervisor-markers.js";
const spawnMock = vi.hoisted(() => vi.fn());
const triggerOpenClawRestartMock = vi.hoisted(() => vi.fn());
const scheduleDetachedLaunchdRestartHandoffMock = vi.hoisted(() => vi.fn());
vi.mock("node:child_process", () => ({
spawn: (...args: unknown[]) => spawnMock(...args),
@@ -11,6 +12,10 @@ vi.mock("node:child_process", () => ({
vi.mock("./restart.js", () => ({
triggerOpenClawRestart: (...args: unknown[]) => triggerOpenClawRestartMock(...args),
}));
vi.mock("../daemon/launchd-restart-handoff.js", () => ({
scheduleDetachedLaunchdRestartHandoff: (...args: unknown[]) =>
scheduleDetachedLaunchdRestartHandoffMock(...args),
}));
import { restartGatewayProcessWithFreshPid } from "./process-respawn.js";
@@ -35,6 +40,8 @@ afterEach(() => {
process.execArgv = [...originalExecArgv];
spawnMock.mockClear();
triggerOpenClawRestartMock.mockClear();
scheduleDetachedLaunchdRestartHandoffMock.mockReset();
scheduleDetachedLaunchdRestartHandoffMock.mockReturnValue({ ok: true, pid: 8123 });
if (originalPlatformDescriptor) {
Object.defineProperty(process, "platform", originalPlatformDescriptor);
}
@@ -54,6 +61,11 @@ function expectLaunchdSupervisedWithoutKickstart(params?: { launchJobLabel?: str
process.env.OPENCLAW_LAUNCHD_LABEL = "ai.openclaw.gateway";
const result = restartGatewayProcessWithFreshPid();
expect(result.mode).toBe("supervised");
expect(scheduleDetachedLaunchdRestartHandoffMock).toHaveBeenCalledWith({
env: process.env,
mode: "start-after-exit",
waitForPid: process.pid,
});
expect(triggerOpenClawRestartMock).not.toHaveBeenCalled();
expect(spawnMock).not.toHaveBeenCalled();
}
@@ -72,6 +84,12 @@ describe("restartGatewayProcessWithFreshPid", () => {
process.env.LAUNCH_JOB_LABEL = "ai.openclaw.gateway";
const result = restartGatewayProcessWithFreshPid();
expect(result.mode).toBe("supervised");
expect(result.detail).toContain("launchd restart handoff");
expect(scheduleDetachedLaunchdRestartHandoffMock).toHaveBeenCalledWith({
env: process.env,
mode: "start-after-exit",
waitForPid: process.pid,
});
expect(triggerOpenClawRestartMock).not.toHaveBeenCalled();
expect(spawnMock).not.toHaveBeenCalled();
});
@@ -96,6 +114,25 @@ describe("restartGatewayProcessWithFreshPid", () => {
expect(triggerOpenClawRestartMock).not.toHaveBeenCalled();
});
it("falls back to plain supervised exit when launchd handoff scheduling fails", () => {
clearSupervisorHints();
setPlatform("darwin");
process.env.XPC_SERVICE_NAME = "ai.openclaw.gateway";
scheduleDetachedLaunchdRestartHandoffMock.mockReturnValue({
ok: false,
detail: "spawn failed",
});
const result = restartGatewayProcessWithFreshPid();
expect(result).toEqual({
mode: "supervised",
detail: "launchd exit fallback (spawn failed)",
});
expect(triggerOpenClawRestartMock).not.toHaveBeenCalled();
expect(spawnMock).not.toHaveBeenCalled();
});
it("does not schedule kickstart on non-darwin platforms", () => {
setPlatform("linux");
process.env.INVOCATION_ID = "abc123";

View File

@@ -1,4 +1,5 @@
import { spawn } from "node:child_process";
import { scheduleDetachedLaunchdRestartHandoff } from "../daemon/launchd-restart-handoff.js";
import { triggerOpenClawRestart } from "./restart.js";
import { detectRespawnSupervisor } from "./supervisor-markers.js";
@@ -30,10 +31,25 @@ export function restartGatewayProcessWithFreshPid(): GatewayRespawnResult {
}
const supervisor = detectRespawnSupervisor(process.env);
if (supervisor) {
// launchd: exit(0) is sufficient — KeepAlive=true restarts the service.
// Self-issued `kickstart -k` races with launchd's bootout state machine
// and can leave the LaunchAgent permanently unloaded.
// See: https://github.com/openclaw/openclaw/issues/39760
// Hand off launchd restarts to a detached helper before exiting so config
// reloads and SIGUSR1-driven restarts do not depend on exit/respawn timing.
if (supervisor === "launchd") {
const handoff = scheduleDetachedLaunchdRestartHandoff({
env: process.env,
mode: "start-after-exit",
waitForPid: process.pid,
});
if (!handoff.ok) {
return {
mode: "supervised",
detail: `launchd exit fallback (${handoff.detail ?? "restart handoff failed"})`,
};
}
return {
mode: "supervised",
detail: `launchd restart handoff pid ${handoff.pid ?? "unknown"}`,
};
}
if (supervisor === "schtasks") {
const restart = triggerOpenClawRestart();
if (!restart.ok) {