fix: recover unloaded macOS launch agents (#43766)

This commit is contained in:
Peter Steinberger
2026-04-05 09:01:05 +01:00
parent 07e7b7177f
commit fd968bfb2d
8 changed files with 276 additions and 15 deletions

View File

@@ -27,6 +27,7 @@ const service = {
restart: vi.fn(),
};
const runServiceStart = vi.fn();
const runServiceRestart = vi.fn();
const runServiceStop = vi.fn();
const waitForGatewayHealthyListener = vi.fn();
@@ -50,6 +51,8 @@ const probeGateway = vi.fn<
>();
const isRestartEnabled = vi.fn<(config?: { commands?: unknown }) => boolean>(() => true);
const loadConfig = vi.hoisted(() => vi.fn(() => ({})));
const launchAgentPlistExists = vi.hoisted(() => vi.fn());
const repairLaunchAgentBootstrap = vi.hoisted(() => vi.fn());
vi.mock("../../config/config.js", () => ({
loadConfig: () => loadConfig(),
@@ -81,6 +84,12 @@ vi.mock("../../daemon/service.js", () => ({
resolveGatewayService: () => service,
}));
vi.mock("../../daemon/launchd.js", () => ({
launchAgentPlistExists: (env: Record<string, string | undefined>) => launchAgentPlistExists(env),
repairLaunchAgentBootstrap: (args: { env?: Record<string, string | undefined> }) =>
repairLaunchAgentBootstrap(args),
}));
vi.mock("./restart-health.js", () => ({
DEFAULT_RESTART_HEALTH_ATTEMPTS: 120,
DEFAULT_RESTART_HEALTH_DELAY_MS: 500,
@@ -93,12 +102,13 @@ vi.mock("./restart-health.js", () => ({
vi.mock("./lifecycle-core.js", () => ({
runServiceRestart,
runServiceStart: vi.fn(),
runServiceStart,
runServiceStop,
runServiceUninstall: vi.fn(),
}));
describe("runDaemonRestart health checks", () => {
let runDaemonStart: (opts?: { json?: boolean }) => Promise<void>;
let runDaemonRestart: (opts?: { json?: boolean }) => Promise<boolean>;
let runDaemonStop: (opts?: { json?: boolean }) => Promise<void>;
let envSnapshot: ReturnType<typeof captureEnv>;
@@ -127,7 +137,7 @@ describe("runDaemonRestart health checks", () => {
}
beforeAll(async () => {
({ runDaemonRestart, runDaemonStop } = await import("./lifecycle.js"));
({ runDaemonStart, runDaemonRestart, runDaemonStop } = await import("./lifecycle.js"));
});
beforeEach(() => {
@@ -135,6 +145,7 @@ describe("runDaemonRestart health checks", () => {
delete process.env.OPENCLAW_CONTAINER_HINT;
service.readCommand.mockReset();
service.restart.mockReset();
runServiceStart.mockReset();
runServiceRestart.mockReset();
runServiceStop.mockReset();
waitForGatewayHealthyListener.mockReset();
@@ -149,12 +160,17 @@ describe("runDaemonRestart health checks", () => {
probeGateway.mockReset();
isRestartEnabled.mockReset();
loadConfig.mockReset();
launchAgentPlistExists.mockReset();
repairLaunchAgentBootstrap.mockReset();
service.readCommand.mockResolvedValue({
programArguments: ["openclaw", "gateway", "--port", "18789"],
environment: {},
});
service.restart.mockResolvedValue({ outcome: "completed" });
runServiceStart.mockResolvedValue(undefined);
launchAgentPlistExists.mockResolvedValue(false);
repairLaunchAgentBootstrap.mockResolvedValue({ ok: true });
runServiceRestart.mockImplementation(async (params: RestartParams) => {
const fail = (message: string, hints?: string[]) => {
@@ -175,6 +191,12 @@ describe("runDaemonRestart health checks", () => {
healthy: true,
portUsage: { port: 18789, status: "busy", listeners: [], hints: [] },
});
waitForGatewayHealthyRestart.mockResolvedValue({
healthy: true,
staleGatewayPids: [],
runtime: { status: "running" },
portUsage: { port: 18789, status: "busy", listeners: [], hints: [] },
});
probeGateway.mockResolvedValue({
ok: true,
configSnapshot: { commands: { restart: true } },
@@ -189,6 +211,19 @@ describe("runDaemonRestart health checks", () => {
vi.restoreAllMocks();
});
it("re-bootstraps an installed LaunchAgent when start finds it not loaded", async () => {
vi.spyOn(process, "platform", "get").mockReturnValue("darwin");
launchAgentPlistExists.mockResolvedValue(true);
runServiceStart.mockImplementation(async (params: { onNotLoaded?: () => Promise<unknown> }) => {
await params.onNotLoaded?.();
});
await runDaemonStart({ json: true });
expect(launchAgentPlistExists).toHaveBeenCalledWith(process.env);
expect(repairLaunchAgentBootstrap).toHaveBeenCalledWith({ env: process.env });
});
it("kills stale gateway pids and retries restart", async () => {
const unhealthy: RestartHealthSnapshot = {
healthy: false,
@@ -307,6 +342,48 @@ describe("runDaemonRestart health checks", () => {
expect(service.restart).not.toHaveBeenCalled();
});
it("prefers unmanaged restart over launchd repair when a gateway listener is present", async () => {
vi.spyOn(process, "platform", "get").mockReturnValue("darwin");
launchAgentPlistExists.mockResolvedValue(true);
findVerifiedGatewayListenerPidsOnPortSync.mockReturnValue([4200]);
mockUnmanagedRestart({ runPostRestartCheck: true });
await runDaemonRestart({ json: true });
expect(signalVerifiedGatewayPidSync).toHaveBeenCalledWith(4200, "SIGUSR1");
expect(repairLaunchAgentBootstrap).not.toHaveBeenCalled();
expect(waitForGatewayHealthyListener).toHaveBeenCalledTimes(1);
expect(waitForGatewayHealthyRestart).not.toHaveBeenCalled();
});
it("re-bootstraps an installed LaunchAgent on restart when no unmanaged listener exists", async () => {
vi.spyOn(process, "platform", "get").mockReturnValue("darwin");
launchAgentPlistExists.mockResolvedValue(true);
findVerifiedGatewayListenerPidsOnPortSync.mockReturnValue([]);
runServiceRestart.mockImplementation(
async (params: RestartParams & { onNotLoaded?: () => Promise<unknown> }) => {
await params.onNotLoaded?.();
await params.postRestartCheck?.({
json: Boolean(params.opts?.json),
stdout: process.stdout,
warnings: [],
fail: (message: string) => {
throw new Error(message);
},
});
return true;
},
);
await runDaemonRestart({ json: true });
expect(repairLaunchAgentBootstrap).toHaveBeenCalledWith({ env: process.env });
expect(signalVerifiedGatewayPidSync).not.toHaveBeenCalled();
expect(waitForGatewayHealthyListener).not.toHaveBeenCalled();
expect(waitForGatewayHealthyRestart).toHaveBeenCalledTimes(1);
expect(service.restart).not.toHaveBeenCalled();
});
it("fails unmanaged restart when multiple gateway listeners are present", async () => {
findVerifiedGatewayListenerPidsOnPortSync.mockReturnValue([4200, 4300]);
mockUnmanagedRestart();