import { describe, expect, it, vi } from "vitest"; import type { GatewayServer } from "../../gateway/server.impl.js"; import type { GatewayBonjourBeacon } from "../../infra/bonjour-discovery.js"; import { pickBeaconHost, pickGatewayPort } from "./discover.js"; const acquireGatewayLock = vi.fn(async (_opts?: { port?: number }) => ({ release: vi.fn(async () => {}), })); const consumeGatewayRestartIntentPayloadSync = vi.fn< () => { reason?: string; force?: boolean; waitMs?: number } | null >(() => null); const consumeGatewaySigusr1RestartIntent = vi.fn< () => { reason?: string; force?: boolean; waitMs?: number } | null >(() => null); const consumeGatewaySigusr1RestartAuthorization = vi.fn(() => true); const consumeGatewayRestartIntentSync = vi.fn(() => false); const isGatewaySigusr1RestartExternallyAllowed = vi.fn(() => false); const markGatewaySigusr1RestartHandled = vi.fn(); const peekGatewaySigusr1RestartReason = vi.fn<() => string | undefined>(() => undefined); const resetGatewayRestartStateForInProcessRestart = vi.fn(); const writeGatewayRestartHandoffSync = vi.fn((_opts: unknown) => ({ kind: "gateway-supervisor-restart-handoff" as const, version: 1 as const, intentId: "test-intent", pid: process.pid, createdAt: Date.now(), expiresAt: Date.now() + 60_000, source: "unknown" as const, restartKind: "full-process" as const, supervisorMode: "external" as const, })); const scheduleGatewaySigusr1Restart = vi.fn((_opts?: { delayMs?: number; reason?: string }) => ({ ok: true, pid: process.pid, signal: "SIGUSR1" as const, delayMs: 0, mode: "emit" as const, coalesced: false, cooldownMsApplied: 0, })); const getActiveTaskCount = vi.fn(() => 0); const getInspectableActiveTaskRestartBlockers = vi.fn( () => [] as Array<{ taskId: string; status: "queued" | "running"; runtime: "subagent" | "acp" | "cli" | "cron"; runId?: string; label?: string; title?: string; }>, ); const markGatewayDraining = vi.fn(); const waitForActiveTasks = vi.fn(async (_timeoutMs?: number) => ({ drained: true })); const resetAllLanes = vi.fn(); const reloadTaskRegistryFromStore = vi.fn(); const clearRuntimeConfigSnapshot = vi.fn(); const restartGatewayProcessWithFreshPid = vi.fn< (_opts?: { env?: NodeJS.ProcessEnv }) => { mode: "spawned" | "supervised" | "disabled" | "failed"; pid?: number; detail?: string; } >(() => ({ mode: "disabled" })); const respawnGatewayProcessForUpdate = vi.fn< (_opts?: { env?: NodeJS.ProcessEnv }) => { mode: "spawned" | "supervised" | "disabled" | "failed"; pid?: number; detail?: string; child?: { kill: () => void }; } >(() => ({ mode: "disabled", detail: "OPENCLAW_NO_RESPAWN" })); const markUpdateRestartSentinelFailure = vi.fn<(reason: string) => Promise>( async (_reason: string) => null, ); const abortEmbeddedAgentRun = vi.fn( (_sessionId?: string, _opts?: { mode?: "all" | "compacting" }) => false, ); const getActiveEmbeddedRunCount = vi.fn(() => 0); const listActiveEmbeddedRunSessionIds = vi.fn(() => [] as string[]); const listActiveEmbeddedRunSessionKeys = vi.fn(() => [] as string[]); const markRestartAbortedMainSessions = vi.fn(async (_params: unknown) => ({ marked: 1, skipped: 0, })); const waitForActiveEmbeddedRuns = vi.fn(async (_timeoutMs?: number) => ({ drained: true })); const DRAIN_TIMEOUT_LOG = "drain timeout reached; proceeding with restart"; const ACTIVE_RUN_DRAIN_TIMEOUT_LOG = "active embedded run drain timeout reached; aborting active run(s) before restart"; const DEFAULT_RESTART_DEFERRAL_TIMEOUT_MS = 300_000; const loadConfig = vi.fn<() => { gateway: { reload: { deferralTimeoutMs?: number } } }>(() => ({ gateway: { reload: { deferralTimeoutMs: 90_000, }, }, })); const gatewayLog = { info: vi.fn(), warn: vi.fn(), error: vi.fn(), }; vi.mock("../../infra/gateway-lock.js", () => ({ acquireGatewayLock: (opts?: { port?: number }) => acquireGatewayLock(opts), })); vi.mock("../../infra/restart.js", () => ({ consumeGatewayRestartIntentPayloadSync: () => consumeGatewayRestartIntentPayloadSync(), consumeGatewaySigusr1RestartIntent: () => consumeGatewaySigusr1RestartIntent(), consumeGatewaySigusr1RestartAuthorization: () => consumeGatewaySigusr1RestartAuthorization(), consumeGatewayRestartIntentSync: () => consumeGatewayRestartIntentSync(), isGatewaySigusr1RestartExternallyAllowed: () => isGatewaySigusr1RestartExternallyAllowed(), markGatewaySigusr1RestartHandled: () => markGatewaySigusr1RestartHandled(), peekGatewaySigusr1RestartReason: () => peekGatewaySigusr1RestartReason(), resetGatewayRestartStateForInProcessRestart: () => resetGatewayRestartStateForInProcessRestart(), resolveGatewayRestartDeferralTimeoutMs: (timeoutMs: unknown) => { if (typeof timeoutMs !== "number" || !Number.isFinite(timeoutMs)) { return DEFAULT_RESTART_DEFERRAL_TIMEOUT_MS; } if (timeoutMs <= 0) { return undefined; } return Math.floor(timeoutMs); }, scheduleGatewaySigusr1Restart: (opts?: { delayMs?: number; reason?: string }) => scheduleGatewaySigusr1Restart(opts), })); vi.mock("../../infra/process-respawn.js", () => ({ respawnGatewayProcessForUpdate: (opts?: { env?: NodeJS.ProcessEnv }) => respawnGatewayProcessForUpdate(opts), restartGatewayProcessWithFreshPid: (opts?: { env?: NodeJS.ProcessEnv }) => restartGatewayProcessWithFreshPid(opts), })); vi.mock("../../infra/restart-sentinel.js", () => ({ markUpdateRestartSentinelFailure: (reason: string) => markUpdateRestartSentinelFailure(reason), })); vi.mock("../../infra/restart-handoff.js", () => ({ writeGatewayRestartHandoffSync: (opts: unknown) => writeGatewayRestartHandoffSync(opts), })); vi.mock("../../process/command-queue.js", () => ({ getActiveTaskCount: () => getActiveTaskCount(), markGatewayDraining: () => markGatewayDraining(), waitForActiveTasks: (timeoutMs?: number) => waitForActiveTasks(timeoutMs), resetAllLanes: () => resetAllLanes(), })); vi.mock("../../tasks/runtime-internal.js", () => ({ reloadTaskRegistryFromStore: () => reloadTaskRegistryFromStore(), })); vi.mock("../../config/runtime-snapshot.js", () => ({ clearRuntimeConfigSnapshot: () => clearRuntimeConfigSnapshot(), })); vi.mock("../../tasks/task-registry.maintenance.js", () => ({ getInspectableActiveTaskRestartBlockers: () => getInspectableActiveTaskRestartBlockers(), })); vi.mock("../../agents/embedded-agent-runner/runs.js", () => ({ abortEmbeddedAgentRun: (sessionId?: string, opts?: { mode?: "all" | "compacting" }) => abortEmbeddedAgentRun(sessionId, opts), getActiveEmbeddedRunCount: () => getActiveEmbeddedRunCount(), listActiveEmbeddedRunSessionIds: () => listActiveEmbeddedRunSessionIds(), listActiveEmbeddedRunSessionKeys: () => listActiveEmbeddedRunSessionKeys(), waitForActiveEmbeddedRuns: (timeoutMs?: number) => waitForActiveEmbeddedRuns(timeoutMs), })); vi.mock("../../agents/main-session-restart-recovery.js", () => ({ markRestartAbortedMainSessions: (params: unknown) => markRestartAbortedMainSessions(params), })); vi.mock("../../config/config.js", () => ({ getRuntimeConfig: () => loadConfig(), loadConfig: () => loadConfig(), })); vi.mock("../../logging/subsystem.js", () => ({ createSubsystemLogger: () => gatewayLog, })); const LOOP_SIGNALS = ["SIGTERM", "SIGINT", "SIGUSR1"] as const; type LoopSignal = (typeof LOOP_SIGNALS)[number]; const originalPlatformDescriptor = Object.getOwnPropertyDescriptor(process, "platform"); function setPlatform(platform: string) { if (!originalPlatformDescriptor) { return; } Object.defineProperty(process, "platform", { ...originalPlatformDescriptor, value: platform, }); } function removeNewSignalListeners(signal: LoopSignal, existing: Set<(...args: unknown[]) => void>) { for (const listener of process.listeners(signal)) { const fn = listener as (...args: unknown[]) => void; if (!existing.has(fn)) { process.removeListener(signal, fn); } } } function addedSignalListener( signal: LoopSignal, existing: Set<(...args: unknown[]) => void>, ): (() => void) | null { const listeners = process.listeners(signal) as Array<(...args: unknown[]) => void>; for (let i = listeners.length - 1; i >= 0; i -= 1) { const listener = listeners[i]; if (listener && !existing.has(listener)) { return listener as () => void; } } return null; } async function withIsolatedSignals( run: (helpers: { captureSignal: (signal: LoopSignal) => () => void }) => Promise, ) { const existingListeners = Object.fromEntries( LOOP_SIGNALS.map((signal) => [ signal, new Set(process.listeners(signal) as Array<(...args: unknown[]) => void>), ]), ) as Record void>>; const captureSignal = (signal: LoopSignal) => { const listener = addedSignalListener(signal, existingListeners[signal]); if (!listener) { throw new Error(`expected new ${signal} listener`); } return () => listener(); }; try { await run({ captureSignal }); } finally { for (const signal of LOOP_SIGNALS) { removeNewSignalListeners(signal, existingListeners[signal]); } } } function createRuntimeWithExitSignal(exitCallOrder?: string[]) { let resolveExit: (code: number) => void = () => {}; const exited = new Promise((resolve) => { resolveExit = resolve; }); const runtime = { log: vi.fn(), error: vi.fn(), exit: vi.fn((code: number) => { exitCallOrder?.push("exit"); resolveExit(code); }), }; return { runtime, exited }; } type GatewayCloseFn = GatewayServer["close"]; type LoopRuntime = { log: (...args: unknown[]) => void; error: (...args: unknown[]) => void; exit: (code: number) => void; }; function createCloseMock() { return vi.fn(async (_opts) => {}); } function expectRestartCloseCall( close: ReturnType, maxDrainTimeoutMs: number, ) { expect(close).toHaveBeenCalledWith( expect.objectContaining({ reason: "gateway restarting", restartExpectedMs: 1500, drainTimeoutMs: expect.any(Number), }), ); const closeArgs = close.mock.calls[0]?.[0]; expect(closeArgs?.drainTimeoutMs).toBeLessThanOrEqual(maxDrainTimeoutMs); expect(closeArgs?.drainTimeoutMs).toBeGreaterThanOrEqual(0); } function createSignaledStart(close: GatewayCloseFn) { let resolveStarted: (() => void) | null = null; const started = new Promise((resolve) => { resolveStarted = resolve; }); const start = vi.fn(async () => { resolveStarted?.(); return { close }; }); return { start, started }; } async function runLoopWithStart(params: { start: ReturnType; runtime: LoopRuntime; lockPort?: number; healthHost?: string; waitForHealthyChild?: (port: number, pid?: number, host?: string) => Promise; }) { vi.resetModules(); const { runGatewayLoop } = await import("./run-loop.js"); const loopPromise = runGatewayLoop({ start: params.start as unknown as Parameters[0]["start"], runtime: params.runtime, lockPort: params.lockPort, healthHost: params.healthHost, waitForHealthyChild: params.waitForHealthyChild, }); return { loopPromise }; } async function waitForStart(started: Promise) { await started; await new Promise((resolve) => { setImmediate(resolve); }); } async function waitForLoopCondition(predicate: () => boolean, message: string) { const deadline = Date.now() + 1_000; while (Date.now() < deadline) { if (predicate()) { return; } await new Promise((resolve) => { setImmediate(resolve); }); } throw new Error(message); } async function createSignaledLoopHarness(exitCallOrder?: string[]) { const close = createCloseMock(); const { start, started } = createSignaledStart(close); const { runtime, exited } = createRuntimeWithExitSignal(exitCallOrder); const { loopPromise } = await runLoopWithStart({ start, runtime }); await waitForStart(started); return { close, start, runtime, exited, loopPromise }; } function expectRestartHandoffCall(expected: { restartKind: "full-process" | "update-process"; reason: string | undefined; supervisorMode: "external" | "launchd"; }) { expect(writeGatewayRestartHandoffSync).toHaveBeenCalledTimes(1); const [handoff] = writeGatewayRestartHandoffSync.mock.calls[0] ?? []; if (!handoff || typeof handoff !== "object" || Array.isArray(handoff)) { throw new Error("expected restart handoff options object"); } const processInstanceId = (handoff as { processInstanceId?: unknown }).processInstanceId; expect(typeof processInstanceId).toBe("string"); if (typeof processInstanceId !== "string") { throw new Error("expected restart handoff processInstanceId string"); } expect(processInstanceId).toMatch( /^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/, ); expect(handoff).toEqual({ ...expected, processInstanceId, }); } describe("runGatewayLoop", () => { it("exits 0 on SIGTERM after graceful close", async () => { vi.clearAllMocks(); await withIsolatedSignals(async ({ captureSignal }) => { const { close, runtime, exited } = await createSignaledLoopHarness(); const sigterm = captureSignal("SIGTERM"); sigterm(); await expect(exited).resolves.toBe(0); expect(close).toHaveBeenCalledWith({ reason: "gateway stopping", restartExpectedMs: null, }); expect(runtime.exit).toHaveBeenCalledWith(0); }); }); it("treats SIGTERM with a restart intent as a draining restart", async () => { vi.clearAllMocks(); consumeGatewayRestartIntentPayloadSync.mockReturnValueOnce({}); getActiveTaskCount.mockReturnValueOnce(1).mockReturnValue(0); await withIsolatedSignals(async ({ captureSignal }) => { const closeFirst = createCloseMock(); const closeSecond = createCloseMock(); const { runtime, exited } = createRuntimeWithExitSignal(); let resolveSecond: (() => void) | null = null; const startedSecond = new Promise((resolve) => { resolveSecond = resolve; }); const start = vi .fn() .mockResolvedValueOnce({ close: closeFirst }) .mockImplementationOnce(async () => { resolveSecond?.(); return { close: closeSecond }; }); const { runGatewayLoop } = await import("./run-loop.js"); void runGatewayLoop({ start: start as unknown as Parameters[0]["start"], runtime: runtime as unknown as Parameters[0]["runtime"], }); await new Promise((resolve) => { setImmediate(resolve); }); const sigterm = captureSignal("SIGTERM"); const sigint = captureSignal("SIGINT"); sigterm(); await new Promise((resolve) => { setImmediate(resolve); }); await new Promise((resolve) => { setImmediate(resolve); }); expect(consumeGatewayRestartIntentPayloadSync).toHaveBeenCalledOnce(); expect(markGatewayDraining).toHaveBeenCalledOnce(); expect(waitForActiveTasks).toHaveBeenCalledWith(90_000); expectRestartCloseCall(closeFirst, 90_000); await startedSecond; expect(start).toHaveBeenCalledTimes(2); await new Promise((resolve) => { setImmediate(resolve); }); sigint(); await expect(exited).resolves.toBe(0); expect(closeSecond).toHaveBeenCalledWith({ reason: "gateway stopping", restartExpectedMs: null, }); }); }); it("uses restart intent wait overrides for SIGTERM drain", async () => { vi.clearAllMocks(); consumeGatewayRestartIntentPayloadSync.mockReturnValueOnce({ waitMs: 2_500 }); getActiveTaskCount.mockReturnValueOnce(1).mockReturnValue(0); getActiveEmbeddedRunCount.mockReturnValueOnce(1).mockReturnValue(0); await withIsolatedSignals(async ({ captureSignal }) => { const { start, exited } = await createSignaledLoopHarness(); const sigterm = captureSignal("SIGTERM"); const sigint = captureSignal("SIGINT"); sigterm(); await new Promise((resolve) => { setImmediate(resolve); }); await new Promise((resolve) => { setImmediate(resolve); }); expect(waitForActiveTasks).toHaveBeenCalledWith(2_500); expect(waitForActiveEmbeddedRuns).toHaveBeenCalledWith(2_500); expect(start).toHaveBeenCalledTimes(2); sigint(); await expect(exited).resolves.toBe(0); }); }); it("caps reply drain time for unbounded SIGTERM restarts", async () => { vi.clearAllMocks(); consumeGatewayRestartIntentPayloadSync.mockReturnValueOnce({ waitMs: 0 }); await withIsolatedSignals(async ({ captureSignal }) => { const { close, start, exited } = await createSignaledLoopHarness(); const sigterm = captureSignal("SIGTERM"); const sigint = captureSignal("SIGINT"); sigterm(); await new Promise((resolve) => { setImmediate(resolve); }); await new Promise((resolve) => { setImmediate(resolve); }); expectRestartCloseCall(close, 15_000); expect(start).toHaveBeenCalledTimes(2); sigint(); await expect(exited).resolves.toBe(0); }); }); it("waits indefinitely for active embedded runs on unbounded restarts", async () => { vi.clearAllMocks(); consumeGatewayRestartIntentPayloadSync.mockReturnValueOnce({ waitMs: 0 }); getActiveEmbeddedRunCount.mockReturnValueOnce(1).mockReturnValue(0); waitForActiveEmbeddedRuns.mockResolvedValueOnce({ drained: true }); await withIsolatedSignals(async ({ captureSignal }) => { const { close, start, exited } = await createSignaledLoopHarness(); const sigterm = captureSignal("SIGTERM"); const sigint = captureSignal("SIGINT"); sigterm(); await new Promise((resolve) => { setImmediate(resolve); }); await new Promise((resolve) => { setImmediate(resolve); }); expect(waitForActiveEmbeddedRuns).toHaveBeenCalledWith(undefined); expect(abortEmbeddedAgentRun).toHaveBeenCalledWith(undefined, { mode: "compacting" }); expect(abortEmbeddedAgentRun).not.toHaveBeenCalledWith(undefined, { mode: "all" }); expectRestartCloseCall(close, 15_000); expect(start).toHaveBeenCalledTimes(2); sigint(); await expect(exited).resolves.toBe(0); }); }); it("waits indefinitely for active embedded runs when reload config disables deferral timeout", async () => { vi.clearAllMocks(); loadConfig.mockReturnValueOnce({ gateway: { reload: { deferralTimeoutMs: 0, }, }, }); getActiveEmbeddedRunCount.mockReturnValueOnce(1).mockReturnValue(0); waitForActiveEmbeddedRuns.mockResolvedValueOnce({ drained: true }); await withIsolatedSignals(async ({ captureSignal }) => { const { close, start, exited } = await createSignaledLoopHarness(); const sigusr1 = captureSignal("SIGUSR1"); const sigint = captureSignal("SIGINT"); sigusr1(); await new Promise((resolve) => { setImmediate(resolve); }); await new Promise((resolve) => { setImmediate(resolve); }); expect(waitForActiveEmbeddedRuns).toHaveBeenCalledWith(undefined); expect(abortEmbeddedAgentRun).toHaveBeenCalledWith(undefined, { mode: "compacting" }); expect(abortEmbeddedAgentRun).not.toHaveBeenCalledWith(undefined, { mode: "all" }); expectRestartCloseCall(close, 15_000); expect(start).toHaveBeenCalledTimes(2); sigint(); await expect(exited).resolves.toBe(0); }); }); it("uses the restart drain timeout for active embedded runs before aborting", async () => { vi.clearAllMocks(); consumeGatewayRestartIntentPayloadSync.mockReturnValueOnce({}); getActiveTaskCount.mockReturnValueOnce(1).mockReturnValue(0); getActiveEmbeddedRunCount.mockReturnValueOnce(1).mockReturnValue(0); listActiveEmbeddedRunSessionIds.mockReturnValueOnce(["session-embedded-timeout"]); listActiveEmbeddedRunSessionKeys.mockReturnValueOnce(["agent:main:embedded-timeout"]); waitForActiveTasks.mockResolvedValueOnce({ drained: false }); waitForActiveEmbeddedRuns.mockResolvedValueOnce({ drained: false }); markRestartAbortedMainSessions.mockRejectedValueOnce(new Error("store read-only")); await withIsolatedSignals(async ({ captureSignal }) => { const { close, start, exited } = await createSignaledLoopHarness(); const sigterm = captureSignal("SIGTERM"); const sigint = captureSignal("SIGINT"); sigterm(); await new Promise((resolve) => { setImmediate(resolve); }); await new Promise((resolve) => { setImmediate(resolve); }); expect(waitForActiveTasks).toHaveBeenCalledWith(90_000); expect(waitForActiveEmbeddedRuns).toHaveBeenCalledWith(90_000); expect(abortEmbeddedAgentRun).toHaveBeenCalledWith(undefined, { mode: "compacting" }); expect(abortEmbeddedAgentRun).toHaveBeenCalledWith(undefined, { mode: "all" }); expect(gatewayLog.warn).toHaveBeenCalledWith(ACTIVE_RUN_DRAIN_TIMEOUT_LOG); expect(gatewayLog.warn).toHaveBeenCalledWith(DRAIN_TIMEOUT_LOG); expect(markRestartAbortedMainSessions).toHaveBeenCalledWith({ cfg: { gateway: { reload: { deferralTimeoutMs: 90_000, }, }, }, sessionIds: new Set(["session-embedded-timeout"]), sessionKeys: new Set(["agent:main:embedded-timeout"]), reason: "gateway restart drain timeout", }); expect(gatewayLog.warn).toHaveBeenCalledWith( "failed to mark interrupted main sessions for restart recovery: Error: store read-only", ); expectRestartCloseCall(close, 90_000); expect(start).toHaveBeenCalledTimes(2); sigint(); await expect(exited).resolves.toBe(0); }); }); it("skips a second active-work drain after a SIGUSR1 deferral timeout intent", async () => { vi.clearAllMocks(); consumeGatewaySigusr1RestartIntent.mockReturnValueOnce({ force: true, reason: "config reload forced restart", }); getActiveTaskCount.mockReturnValueOnce(1).mockReturnValue(0); getActiveEmbeddedRunCount.mockReturnValueOnce(1).mockReturnValue(0); listActiveEmbeddedRunSessionIds.mockReturnValueOnce(["session-deferral-timeout"]); listActiveEmbeddedRunSessionKeys.mockReturnValueOnce(["agent:main:deferral-timeout"]); await withIsolatedSignals(async ({ captureSignal }) => { const { close, start, exited } = await createSignaledLoopHarness(); const sigusr1 = captureSignal("SIGUSR1"); const sigint = captureSignal("SIGINT"); sigusr1(); await new Promise((resolve) => { setImmediate(resolve); }); await new Promise((resolve) => { setImmediate(resolve); }); expect(waitForActiveTasks).not.toHaveBeenCalled(); expect(waitForActiveEmbeddedRuns).not.toHaveBeenCalled(); expect(abortEmbeddedAgentRun).toHaveBeenCalledWith(undefined, { mode: "compacting" }); expect(abortEmbeddedAgentRun).toHaveBeenCalledWith(undefined, { mode: "all" }); expect(markRestartAbortedMainSessions).toHaveBeenCalledWith({ cfg: { gateway: { reload: { deferralTimeoutMs: 90_000, }, }, }, sessionIds: new Set(["session-deferral-timeout"]), sessionKeys: new Set(["agent:main:deferral-timeout"]), reason: "config reload forced restart", }); expect(markGatewaySigusr1RestartHandled).toHaveBeenCalledOnce(); expectRestartCloseCall(close, 0); expect(start).toHaveBeenCalledTimes(2); sigint(); await expect(exited).resolves.toBe(0); }); }); it("forces SIGTERM restarts without waiting for active task drain", async () => { vi.clearAllMocks(); consumeGatewayRestartIntentPayloadSync.mockReturnValueOnce({ force: true }); getActiveTaskCount.mockReturnValueOnce(1).mockReturnValue(0); getActiveEmbeddedRunCount.mockReturnValueOnce(1).mockReturnValue(0); listActiveEmbeddedRunSessionIds.mockReturnValueOnce(["session-forced-task"]); listActiveEmbeddedRunSessionKeys.mockReturnValueOnce(["agent:main:forced-task"]); const forceTaskBlockers = [ { taskId: "task-force", runId: "run-force", status: "running" as const, runtime: "cron" as const, label: "forced", }, ]; getInspectableActiveTaskRestartBlockers .mockReturnValueOnce(forceTaskBlockers) .mockReturnValueOnce(forceTaskBlockers) .mockReturnValueOnce(forceTaskBlockers); await withIsolatedSignals(async ({ captureSignal }) => { const { start, exited } = await createSignaledLoopHarness(); const sigterm = captureSignal("SIGTERM"); const sigint = captureSignal("SIGINT"); sigterm(); await new Promise((resolve) => { setImmediate(resolve); }); await new Promise((resolve) => { setImmediate(resolve); }); expect(waitForActiveTasks).not.toHaveBeenCalled(); expect(waitForActiveEmbeddedRuns).not.toHaveBeenCalled(); expect(abortEmbeddedAgentRun).toHaveBeenCalledWith(undefined, { mode: "all" }); expect(markRestartAbortedMainSessions).toHaveBeenCalledWith({ cfg: { gateway: { reload: { deferralTimeoutMs: 90_000, }, }, }, sessionIds: new Set(["session-forced-task"]), sessionKeys: new Set(["agent:main:forced-task"]), reason: "forced gateway restart", }); expect(gatewayLog.warn).toHaveBeenCalledWith( "restart blocked by active background task run(s): taskId=task-force runId=run-force status=running runtime=cron label=forced", ); expect(gatewayLog.warn).toHaveBeenCalledWith( "forced restart requested; skipping active work drain", ); expect(start).toHaveBeenCalledTimes(2); sigint(); await expect(exited).resolves.toBe(0); }); }); it("restarts after SIGUSR1 even when drain times out, and resets runtime state for the new iteration", async () => { vi.clearAllMocks(); loadConfig.mockReturnValue({ gateway: { reload: { deferralTimeoutMs: 1_234, }, }, }); peekGatewaySigusr1RestartReason.mockReturnValue(undefined); respawnGatewayProcessForUpdate.mockReturnValue({ mode: "disabled", detail: "OPENCLAW_NO_RESPAWN", }); markUpdateRestartSentinelFailure.mockClear(); await withIsolatedSignals(async ({ captureSignal }) => { getActiveTaskCount.mockReturnValueOnce(2).mockReturnValueOnce(0); getActiveEmbeddedRunCount.mockReturnValueOnce(1).mockReturnValueOnce(0); listActiveEmbeddedRunSessionIds.mockReturnValueOnce(["session-issue-82433"]); listActiveEmbeddedRunSessionKeys.mockReturnValueOnce(["agent:main:issue-82433"]); waitForActiveTasks.mockResolvedValueOnce({ drained: false }); waitForActiveEmbeddedRuns.mockResolvedValueOnce({ drained: true }); type StartServer = () => Promise<{ close: GatewayCloseFn; }>; const closeFirst = createCloseMock(); const closeSecond = createCloseMock(); const closeThird = createCloseMock(); const { runtime, exited } = createRuntimeWithExitSignal(); const start = vi.fn(); let resolveFirst: (() => void) | null = null; const startedFirst = new Promise((resolve) => { resolveFirst = resolve; }); start.mockImplementationOnce(async () => { resolveFirst?.(); return { close: closeFirst }; }); let resolveSecond: (() => void) | null = null; const startedSecond = new Promise((resolve) => { resolveSecond = resolve; }); start.mockImplementationOnce(async () => { resolveSecond?.(); return { close: closeSecond }; }); let resolveThird: (() => void) | null = null; const startedThird = new Promise((resolve) => { resolveThird = resolve; }); start.mockImplementationOnce(async () => { resolveThird?.(); return { close: closeThird }; }); const { runGatewayLoop } = await import("./run-loop.js"); void runGatewayLoop({ start: start as unknown as Parameters[0]["start"], runtime: runtime as unknown as Parameters[0]["runtime"], }); await startedFirst; const sigusr1 = captureSignal("SIGUSR1"); const sigterm = captureSignal("SIGTERM"); expect(start).toHaveBeenCalledTimes(1); await new Promise((resolve) => { setImmediate(resolve); }); sigusr1(); await startedSecond; expect(start).toHaveBeenCalledTimes(2); await new Promise((resolve) => { setImmediate(resolve); }); expect(abortEmbeddedAgentRun).toHaveBeenCalledWith(undefined, { mode: "compacting" }); expect(waitForActiveTasks).toHaveBeenCalledWith(1_234); expect(waitForActiveEmbeddedRuns).toHaveBeenCalledWith(1_234); expect(abortEmbeddedAgentRun).toHaveBeenCalledWith(undefined, { mode: "all" }); expect(markRestartAbortedMainSessions).toHaveBeenCalledWith({ cfg: { gateway: { reload: { deferralTimeoutMs: 1_234, }, }, }, sessionIds: new Set(["session-issue-82433"]), sessionKeys: new Set(["agent:main:issue-82433"]), reason: "gateway restart drain timeout", }); expect(markGatewayDraining).toHaveBeenCalledTimes(1); expect(gatewayLog.warn).toHaveBeenCalledWith(DRAIN_TIMEOUT_LOG); expectRestartCloseCall(closeFirst, 1_234); expect(markGatewaySigusr1RestartHandled).toHaveBeenCalledTimes(1); expect(resetAllLanes).toHaveBeenCalledTimes(1); expect(clearRuntimeConfigSnapshot).toHaveBeenCalledTimes(1); expect(resetGatewayRestartStateForInProcessRestart).toHaveBeenCalledTimes(1); expect(reloadTaskRegistryFromStore).toHaveBeenCalledTimes(1); sigusr1(); await startedThird; await new Promise((resolve) => { setImmediate(resolve); }); expectRestartCloseCall(closeSecond, 1_234); expect(markGatewaySigusr1RestartHandled).toHaveBeenCalledTimes(2); expect(markGatewayDraining).toHaveBeenCalledTimes(2); expect(resetAllLanes).toHaveBeenCalledTimes(2); expect(clearRuntimeConfigSnapshot).toHaveBeenCalledTimes(2); expect(resetGatewayRestartStateForInProcessRestart).toHaveBeenCalledTimes(2); expect(reloadTaskRegistryFromStore).toHaveBeenCalledTimes(2); expect(acquireGatewayLock).toHaveBeenCalledTimes(3); sigterm(); await expect(exited).resolves.toBe(0); expect(closeThird).toHaveBeenCalledWith({ reason: "gateway stopping", restartExpectedMs: null, }); }); }); it("queues SIGUSR1 received before the run-loop installs its restart waiter", async () => { vi.clearAllMocks(); peekGatewaySigusr1RestartReason.mockReturnValue(undefined); respawnGatewayProcessForUpdate.mockReturnValue({ mode: "disabled", detail: "OPENCLAW_NO_RESPAWN", }); await withIsolatedSignals(async ({ captureSignal }) => { const closeFirst = createCloseMock(); const closeSecond = createCloseMock(); const { runtime, exited } = createRuntimeWithExitSignal(); let releaseFirstStart!: () => void; const firstStartMayReturn = new Promise((resolve) => { releaseFirstStart = resolve; }); let sigusr1: (() => void) | null = null; let resolveSecondStart: (() => void) | null = null; const startedSecond = new Promise((resolve) => { resolveSecondStart = resolve; }); const start = vi.fn(); start.mockImplementationOnce(async () => { await firstStartMayReturn; sigusr1?.(); await waitForLoopCondition( () => markGatewaySigusr1RestartHandled.mock.calls.length > 0, "expected SIGUSR1 handler to consume the restart before startup returned", ); await waitForLoopCondition( () => markGatewayDraining.mock.calls.length > 0, "expected queued startup restart to mark gateway draining before startup returned", ); return { close: closeFirst }; }); start.mockImplementationOnce(async () => { resolveSecondStart?.(); return { close: closeSecond }; }); const { runGatewayLoop } = await import("./run-loop.js"); void runGatewayLoop({ start: start as unknown as Parameters[0]["start"], runtime: runtime as unknown as Parameters[0]["runtime"], }); await new Promise((resolve) => { setImmediate(resolve); }); sigusr1 = captureSignal("SIGUSR1"); const sigterm = captureSignal("SIGTERM"); try { releaseFirstStart(); await waitForLoopCondition( () => start.mock.calls.length >= 2, "expected queued SIGUSR1 to trigger the second gateway start", ); await startedSecond; expectRestartCloseCall(closeFirst, 90_000); expect(markGatewaySigusr1RestartHandled).toHaveBeenCalledTimes(1); expect(markGatewayDraining).toHaveBeenCalledTimes(1); expect(resetAllLanes).toHaveBeenCalledTimes(1); expect(resetGatewayRestartStateForInProcessRestart).toHaveBeenCalledTimes(1); expect(reloadTaskRegistryFromStore).toHaveBeenCalledTimes(1); } finally { sigterm(); await expect(exited).resolves.toBe(0); } }); }); it("exits if a queued startup restart never reaches a close handle", async () => { vi.clearAllMocks(); peekGatewaySigusr1RestartReason.mockReturnValue(undefined); vi.useFakeTimers(); try { await withIsolatedSignals(async ({ captureSignal }) => { const close = vi.fn(async () => {}); const startupNeverReturns = new Promise(() => {}); const { runtime, exited } = createRuntimeWithExitSignal(); const start = vi.fn(async () => { await startupNeverReturns; return { close }; }); const { runGatewayLoop } = await import("./run-loop.js"); void runGatewayLoop({ start: start as unknown as Parameters[0]["start"], runtime: runtime as unknown as Parameters[0]["runtime"], }); await vi.advanceTimersByTimeAsync(0); const sigusr1 = captureSignal("SIGUSR1"); sigusr1(); await vi.advanceTimersByTimeAsync(0); expect(markGatewaySigusr1RestartHandled).toHaveBeenCalledTimes(1); expect(markGatewayDraining).toHaveBeenCalledTimes(1); expect(runtime.exit).not.toHaveBeenCalled(); await vi.advanceTimersByTimeAsync(24_999); expect(runtime.exit).not.toHaveBeenCalled(); await vi.advanceTimersByTimeAsync(1); await expect(exited).resolves.toBe(1); expect(close).not.toHaveBeenCalled(); expect(start).toHaveBeenCalledTimes(1); expect(gatewayLog.error).toHaveBeenCalledWith( "startup restart request timed out before gateway returned a close handle; exiting for supervisor recovery", ); }); } finally { vi.useRealTimers(); } }); it("processes SIGINT immediately before startup returns a server", async () => { vi.clearAllMocks(); await withIsolatedSignals(async ({ captureSignal }) => { const close = vi.fn(async () => {}); const startupNeverReturns = new Promise(() => {}); const { runtime, exited } = createRuntimeWithExitSignal(); const start = vi.fn(async () => { await startupNeverReturns; return { close }; }); const { runGatewayLoop } = await import("./run-loop.js"); void runGatewayLoop({ start: start as unknown as Parameters[0]["start"], runtime: runtime as unknown as Parameters[0]["runtime"], }); await new Promise((resolve) => { setImmediate(resolve); }); const sigint = captureSignal("SIGINT"); sigint(); await expect(exited).resolves.toBe(0); expect(close).not.toHaveBeenCalled(); expect(start).toHaveBeenCalledTimes(1); expect(acquireGatewayLock).toHaveBeenCalledTimes(1); }); }); it("lets SIGINT override a queued startup restart before startup returns a server", async () => { vi.clearAllMocks(); peekGatewaySigusr1RestartReason.mockReturnValue(undefined); await withIsolatedSignals(async ({ captureSignal }) => { const close = vi.fn(async () => {}); const startupNeverReturns = new Promise(() => {}); const { runtime, exited } = createRuntimeWithExitSignal(); const start = vi.fn(async () => { await startupNeverReturns; return { close }; }); const { runGatewayLoop } = await import("./run-loop.js"); void runGatewayLoop({ start: start as unknown as Parameters[0]["start"], runtime: runtime as unknown as Parameters[0]["runtime"], }); await new Promise((resolve) => { setImmediate(resolve); }); const sigusr1 = captureSignal("SIGUSR1"); const sigint = captureSignal("SIGINT"); sigusr1(); await waitForLoopCondition( () => markGatewaySigusr1RestartHandled.mock.calls.length > 0, "expected startup SIGUSR1 to be queued", ); sigint(); await expect(exited).resolves.toBe(0); expect(close).not.toHaveBeenCalled(); expect(markGatewayDraining).toHaveBeenCalledTimes(1); expect(start).toHaveBeenCalledTimes(1); expect(acquireGatewayLock).toHaveBeenCalledTimes(1); expect(gatewayLog.info).toHaveBeenCalledWith( "received SIGINT; overriding pending startup restart with shutdown", ); }); }); it("processes queued SIGUSR1 when restart startup fails before returning a server", async () => { vi.clearAllMocks(); peekGatewaySigusr1RestartReason.mockReturnValue(undefined); respawnGatewayProcessForUpdate.mockReturnValue({ mode: "disabled", detail: "OPENCLAW_NO_RESPAWN", }); await withIsolatedSignals(async ({ captureSignal }) => { const closeFirst = createCloseMock(); const closeThird = createCloseMock(); const { runtime, exited } = createRuntimeWithExitSignal(); let sigusr1: (() => void) | null = null; let resolveThirdStart: (() => void) | null = null; const startedThird = new Promise((resolve) => { resolveThirdStart = resolve; }); const start = vi.fn(); start.mockResolvedValueOnce({ close: closeFirst }); start.mockImplementationOnce(async () => { sigusr1?.(); await waitForLoopCondition( () => markGatewaySigusr1RestartHandled.mock.calls.length >= 2, "expected SIGUSR1 during failed startup to be accepted before startup throws", ); throw new Error("restart startup failed"); }); start.mockImplementationOnce(async () => { resolveThirdStart?.(); return { close: closeThird }; }); const { runGatewayLoop } = await import("./run-loop.js"); void runGatewayLoop({ start: start as unknown as Parameters[0]["start"], runtime: runtime as unknown as Parameters[0]["runtime"], }); await new Promise((resolve) => { setImmediate(resolve); }); sigusr1 = captureSignal("SIGUSR1"); const sigterm = captureSignal("SIGTERM"); try { sigusr1(); await waitForLoopCondition( () => start.mock.calls.length >= 3, "expected queued SIGUSR1 to advance past failed restart startup", ); await startedThird; expectRestartCloseCall(closeFirst, 90_000); expect(markGatewaySigusr1RestartHandled).toHaveBeenCalledTimes(2); expect(markGatewayDraining).toHaveBeenCalledTimes(2); expect(resetAllLanes).toHaveBeenCalledTimes(2); expect(resetGatewayRestartStateForInProcessRestart).toHaveBeenCalledTimes(2); expect(reloadTaskRegistryFromStore).toHaveBeenCalledTimes(2); expect(acquireGatewayLock).toHaveBeenCalledTimes(3); expect(gatewayLog.error).toHaveBeenCalledWith( expect.stringContaining("gateway startup failed: restart startup failed."), ); } finally { sigterm(); await expect(exited).resolves.toBe(0); } }); }); it("processes SIGUSR1 received after restart startup fails before returning a server", async () => { vi.clearAllMocks(); peekGatewaySigusr1RestartReason.mockReturnValue(undefined); respawnGatewayProcessForUpdate.mockReturnValue({ mode: "disabled", detail: "OPENCLAW_NO_RESPAWN", }); await withIsolatedSignals(async ({ captureSignal }) => { const closeFirst = createCloseMock(); const closeThird = createCloseMock(); const { runtime, exited } = createRuntimeWithExitSignal(); let resolveThirdStart: (() => void) | null = null; const startedThird = new Promise((resolve) => { resolveThirdStart = resolve; }); const start = vi.fn(); start.mockResolvedValueOnce({ close: closeFirst }); start.mockRejectedValueOnce(new Error("restart startup failed")); start.mockImplementationOnce(async () => { resolveThirdStart?.(); return { close: closeThird }; }); const { runGatewayLoop } = await import("./run-loop.js"); void runGatewayLoop({ start: start as unknown as Parameters[0]["start"], runtime: runtime as unknown as Parameters[0]["runtime"], }); await new Promise((resolve) => { setImmediate(resolve); }); const sigusr1 = captureSignal("SIGUSR1"); const sigterm = captureSignal("SIGTERM"); try { sigusr1(); await waitForLoopCondition( () => gatewayLog.error.mock.calls.some(([message]) => String(message).includes("gateway startup failed: restart startup failed."), ), "expected failed restart startup to be logged", ); await new Promise((resolve) => { setImmediate(resolve); }); expect(start).toHaveBeenCalledTimes(2); sigusr1(); await waitForLoopCondition( () => start.mock.calls.length >= 3, "expected post-failure SIGUSR1 to retry gateway startup", ); await startedThird; expectRestartCloseCall(closeFirst, 90_000); expect(markGatewaySigusr1RestartHandled).toHaveBeenCalledTimes(2); expect(markGatewayDraining).toHaveBeenCalledTimes(2); expect(resetAllLanes).toHaveBeenCalledTimes(2); expect(resetGatewayRestartStateForInProcessRestart).toHaveBeenCalledTimes(2); expect(reloadTaskRegistryFromStore).toHaveBeenCalledTimes(2); expect(acquireGatewayLock).toHaveBeenCalledTimes(3); } finally { sigterm(); await expect(exited).resolves.toBe(0); } }); }); it("uses the default restart drain timeout when config omits deferralTimeoutMs", async () => { vi.clearAllMocks(); loadConfig.mockReturnValue({ gateway: { reload: {} } }); peekGatewaySigusr1RestartReason.mockReturnValue(undefined); respawnGatewayProcessForUpdate.mockReturnValue({ mode: "disabled", detail: "OPENCLAW_NO_RESPAWN", }); await withIsolatedSignals(async ({ captureSignal }) => { getActiveTaskCount.mockReturnValueOnce(1).mockReturnValue(0); getActiveEmbeddedRunCount.mockReturnValueOnce(1).mockReturnValue(0); const { start } = await createSignaledLoopHarness(); const sigusr1 = captureSignal("SIGUSR1"); sigusr1(); await new Promise((resolve) => { setImmediate(resolve); }); await new Promise((resolve) => { setImmediate(resolve); }); expect(waitForActiveTasks).toHaveBeenCalledWith(DEFAULT_RESTART_DEFERRAL_TIMEOUT_MS); expect(waitForActiveEmbeddedRuns).toHaveBeenCalledWith(DEFAULT_RESTART_DEFERRAL_TIMEOUT_MS); expect(markGatewayDraining).toHaveBeenCalledOnce(); expect(start).toHaveBeenCalledTimes(2); }); }); it("clears stale restart state before routing external SIGUSR1 through the scheduler", async () => { vi.clearAllMocks(); consumeGatewaySigusr1RestartAuthorization.mockReturnValueOnce(false); isGatewaySigusr1RestartExternallyAllowed.mockReturnValueOnce(true); await withIsolatedSignals(async ({ captureSignal }) => { const { close, start } = await createSignaledLoopHarness(); const sigusr1 = captureSignal("SIGUSR1"); sigusr1(); await new Promise((resolve) => { setImmediate(resolve); }); expect(scheduleGatewaySigusr1Restart).toHaveBeenCalledWith({ delayMs: 0, reason: "SIGUSR1", }); expect(markGatewaySigusr1RestartHandled).toHaveBeenCalledTimes(1); expect(markGatewaySigusr1RestartHandled.mock.invocationCallOrder[0]).toBeLessThan( scheduleGatewaySigusr1Restart.mock.invocationCallOrder[0] ?? 0, ); expect(close).not.toHaveBeenCalled(); expect(start).toHaveBeenCalledTimes(1); }); }); it("clears the in-flight restart token when an unauthorized SIGUSR1 is ignored", async () => { vi.clearAllMocks(); consumeGatewaySigusr1RestartAuthorization.mockReturnValueOnce(false); isGatewaySigusr1RestartExternallyAllowed.mockReturnValueOnce(false); await withIsolatedSignals(async ({ captureSignal }) => { const { close, start } = await createSignaledLoopHarness(); const sigusr1 = captureSignal("SIGUSR1"); sigusr1(); await new Promise((resolve) => { setImmediate(resolve); }); expect(markGatewaySigusr1RestartHandled).toHaveBeenCalledTimes(1); expect(scheduleGatewaySigusr1Restart).not.toHaveBeenCalled(); expect(close).not.toHaveBeenCalled(); expect(start).toHaveBeenCalledTimes(1); expect(gatewayLog.warn).toHaveBeenCalledWith( "SIGUSR1 restart ignored (not authorized; commands.restart=false or use gateway tool).", ); expect(gatewayLog.warn).toHaveBeenCalledTimes(2); expect(gatewayLog.warn).toHaveBeenNthCalledWith( 2, "An unauthorized SIGUSR1 restart signal was received and ignored. " + "If a pending gateway restart needs to be applied, run `openclaw gateway restart` " + "or restart the gateway through your service manager.", ); }); }); it("clears the in-flight restart token when a file intent handles authorized SIGUSR1", async () => { vi.clearAllMocks(); consumeGatewayRestartIntentPayloadSync.mockReturnValueOnce({ force: true, reason: "file-intent restart", }); loadConfig.mockReturnValueOnce({ gateway: { reload: { deferralTimeoutMs: 90_000, }, }, }); getActiveEmbeddedRunCount.mockReturnValueOnce(1).mockReturnValue(0); listActiveEmbeddedRunSessionIds.mockReturnValueOnce(["session-file-intent"]); listActiveEmbeddedRunSessionKeys.mockReturnValueOnce(["agent:main:file-intent"]); await withIsolatedSignals(async ({ captureSignal }) => { const { start, exited } = await createSignaledLoopHarness(); const sigusr1 = captureSignal("SIGUSR1"); const sigint = captureSignal("SIGINT"); sigusr1(); await new Promise((resolve) => { setImmediate(resolve); }); await new Promise((resolve) => { setImmediate(resolve); }); expect(consumeGatewaySigusr1RestartAuthorization).toHaveBeenCalledOnce(); expect(markGatewaySigusr1RestartHandled).toHaveBeenCalledOnce(); expect(markRestartAbortedMainSessions).toHaveBeenCalledWith({ cfg: { gateway: { reload: { deferralTimeoutMs: 90_000, }, }, }, sessionIds: new Set(["session-file-intent"]), sessionKeys: new Set(["agent:main:file-intent"]), reason: "file-intent restart", }); expect(start).toHaveBeenCalledTimes(2); sigint(); await expect(exited).resolves.toBe(0); }); }); it("releases the lock before exiting on spawned restart", async () => { vi.clearAllMocks(); peekGatewaySigusr1RestartReason.mockReturnValue(undefined); const originalTraceEnv = process.env.OPENCLAW_GATEWAY_RESTART_TRACE; process.env.OPENCLAW_GATEWAY_RESTART_TRACE = "1"; try { await withIsolatedSignals(async ({ captureSignal }) => { const lockRelease = vi.fn(async () => {}); acquireGatewayLock.mockResolvedValueOnce({ release: lockRelease, }); // Override process-respawn to return "spawned" mode restartGatewayProcessWithFreshPid.mockReturnValueOnce({ mode: "spawned", pid: 9999, }); const exitCallOrder: string[] = []; const { runtime, exited } = await createSignaledLoopHarness(exitCallOrder); const sigusr1 = captureSignal("SIGUSR1"); lockRelease.mockImplementation(async () => { exitCallOrder.push("lockRelease"); }); sigusr1(); await exited; expect(lockRelease).toHaveBeenCalledTimes(1); expect(runtime.exit).toHaveBeenCalledWith(0); expect(exitCallOrder).toEqual(["lockRelease", "exit"]); const [respawnOpts] = restartGatewayProcessWithFreshPid.mock.calls[0] ?? []; expect(respawnOpts?.env?.OPENCLAW_GATEWAY_RESTART_TRACE_STARTED_AT_MS).toMatch(/^\d/u); expect(respawnOpts?.env?.OPENCLAW_GATEWAY_RESTART_TRACE_LAST_AT_MS).toMatch(/^\d/u); expect(writeGatewayRestartHandoffSync).not.toHaveBeenCalled(); }); } finally { if (originalTraceEnv === undefined) { delete process.env.OPENCLAW_GATEWAY_RESTART_TRACE; } else { process.env.OPENCLAW_GATEWAY_RESTART_TRACE = originalTraceEnv; } } }); it("waits briefly before exiting on launchd supervised restart", async () => { vi.clearAllMocks(); peekGatewaySigusr1RestartReason.mockReturnValue(undefined); try { setPlatform("darwin"); process.env.OPENCLAW_LAUNCHD_LABEL = "ai.openclaw.gateway"; restartGatewayProcessWithFreshPid.mockReturnValueOnce({ mode: "supervised", }); await withIsolatedSignals(async ({ captureSignal }) => { const { runtime, exited } = await createSignaledLoopHarness(); const sigusr1 = captureSignal("SIGUSR1"); vi.useFakeTimers(); sigusr1(); await vi.advanceTimersByTimeAsync(1499); expect(runtime.exit).not.toHaveBeenCalled(); await vi.advanceTimersByTimeAsync(1); await expect(exited).resolves.toBe(0); expect(runtime.exit).toHaveBeenCalledWith(0); expectRestartHandoffCall({ restartKind: "full-process", reason: undefined, supervisorMode: "launchd", }); }); } finally { vi.useRealTimers(); delete process.env.OPENCLAW_LAUNCHD_LABEL; if (originalPlatformDescriptor) { Object.defineProperty(process, "platform", originalPlatformDescriptor); } } }); it("carries SIGTERM restart intent reason into launchd supervised handoff", async () => { vi.clearAllMocks(); consumeGatewayRestartIntentPayloadSync.mockReturnValueOnce({ reason: "gateway.restart" }); try { setPlatform("darwin"); process.env.OPENCLAW_LAUNCHD_LABEL = "ai.openclaw.gateway"; restartGatewayProcessWithFreshPid.mockReturnValueOnce({ mode: "supervised", }); await withIsolatedSignals(async ({ captureSignal }) => { const { exited } = await createSignaledLoopHarness(); const sigterm = captureSignal("SIGTERM"); vi.useFakeTimers(); sigterm(); await vi.advanceTimersByTimeAsync(1500); await expect(exited).resolves.toBe(0); expectRestartHandoffCall({ restartKind: "full-process", reason: "gateway.restart", supervisorMode: "launchd", }); }); } finally { vi.useRealTimers(); delete process.env.OPENCLAW_LAUNCHD_LABEL; if (originalPlatformDescriptor) { Object.defineProperty(process, "platform", originalPlatformDescriptor); } } }); it("forwards lockPort to initial and restart lock acquisitions", async () => { vi.clearAllMocks(); peekGatewaySigusr1RestartReason.mockReturnValue(undefined); await withIsolatedSignals(async ({ captureSignal }) => { const closeFirst = vi.fn(async () => {}); const closeSecond = vi.fn(async () => {}); const closeThird = vi.fn(async () => {}); const { runtime, exited } = createRuntimeWithExitSignal(); const start = vi .fn() .mockResolvedValueOnce({ close: closeFirst }) .mockResolvedValueOnce({ close: closeSecond }) .mockResolvedValueOnce({ close: closeThird }); const { runGatewayLoop } = await import("./run-loop.js"); void runGatewayLoop({ start: start as unknown as Parameters[0]["start"], runtime: runtime as unknown as Parameters[0]["runtime"], lockPort: 18789, }); await new Promise((resolve) => { setImmediate(resolve); }); const sigusr1 = captureSignal("SIGUSR1"); const sigterm = captureSignal("SIGTERM"); sigusr1(); await new Promise((resolve) => { setImmediate(resolve); }); sigusr1(); await new Promise((resolve) => { setImmediate(resolve); }); expect(acquireGatewayLock).toHaveBeenNthCalledWith(1, { port: 18789 }); expect(acquireGatewayLock).toHaveBeenNthCalledWith(2, { port: 18789 }); expect(acquireGatewayLock).toHaveBeenNthCalledWith(3, { port: 18789 }); sigterm(); await expect(exited).resolves.toBe(0); }); }); it("exits when lock reacquire fails during in-process restart fallback", async () => { vi.clearAllMocks(); peekGatewaySigusr1RestartReason.mockReturnValue(undefined); await withIsolatedSignals(async ({ captureSignal }) => { const lockRelease = vi.fn(async () => {}); acquireGatewayLock .mockResolvedValueOnce({ release: lockRelease, }) .mockRejectedValueOnce(new Error("lock timeout")); restartGatewayProcessWithFreshPid.mockReturnValueOnce({ mode: "disabled", }); const { start, exited } = await createSignaledLoopHarness(); const sigusr1 = captureSignal("SIGUSR1"); sigusr1(); await expect(exited).resolves.toBe(1); expect(acquireGatewayLock).toHaveBeenCalledTimes(2); expect(start).toHaveBeenCalledTimes(1); expect(gatewayLog.error).toHaveBeenCalledWith( "failed to reacquire gateway lock for in-process restart: Error: lock timeout", ); }); }); it("hard-respawns update restarts and exits only after the replacement becomes healthy", async () => { vi.clearAllMocks(); peekGatewaySigusr1RestartReason.mockReturnValue("update.run"); respawnGatewayProcessForUpdate.mockReturnValueOnce({ mode: "spawned", pid: 7777, child: { kill: vi.fn() }, }); await withIsolatedSignals(async ({ captureSignal }) => { const waitForHealthyChild = vi.fn(async () => true); const close = vi.fn(async () => {}); const { start, started } = createSignaledStart(close); const { runtime, exited } = createRuntimeWithExitSignal(); await runLoopWithStart({ start, runtime, lockPort: 18789, waitForHealthyChild }); await waitForStart(started); const sigusr1 = captureSignal("SIGUSR1"); sigusr1(); await expect(exited).resolves.toBe(0); expect(waitForHealthyChild).toHaveBeenCalledWith(18789, 7777, "127.0.0.1"); expect(respawnGatewayProcessForUpdate).toHaveBeenCalledTimes(1); expect(start).toHaveBeenCalledTimes(1); expect(markUpdateRestartSentinelFailure).not.toHaveBeenCalled(); expect(writeGatewayRestartHandoffSync).not.toHaveBeenCalled(); }); }); it("writes a handoff before exiting for supervised update restarts", async () => { vi.clearAllMocks(); peekGatewaySigusr1RestartReason.mockReturnValue("update.run"); respawnGatewayProcessForUpdate.mockReturnValueOnce({ mode: "supervised", }); try { setPlatform("freebsd"); await withIsolatedSignals(async ({ captureSignal }) => { const { runtime, exited } = await createSignaledLoopHarness(); const sigusr1 = captureSignal("SIGUSR1"); sigusr1(); await expect(exited).resolves.toBe(0); expect(runtime.exit).toHaveBeenCalledWith(0); expectRestartHandoffCall({ restartKind: "update-process", reason: "update.run", supervisorMode: "external", }); }); } finally { if (originalPlatformDescriptor) { Object.defineProperty(process, "platform", originalPlatformDescriptor); } } }); it("probes the configured gateway host for update respawn health", async () => { vi.clearAllMocks(); peekGatewaySigusr1RestartReason.mockReturnValue("update.run"); respawnGatewayProcessForUpdate.mockReturnValueOnce({ mode: "spawned", pid: 7778, child: { kill: vi.fn() }, }); await withIsolatedSignals(async ({ captureSignal }) => { const waitForHealthyChild = vi.fn(async () => true); const close = vi.fn(async () => {}); const { start, started } = createSignaledStart(close); const { runtime, exited } = createRuntimeWithExitSignal(); await runLoopWithStart({ start, runtime, lockPort: 18789, healthHost: "10.0.0.25", waitForHealthyChild, }); await waitForStart(started); const sigusr1 = captureSignal("SIGUSR1"); sigusr1(); await expect(exited).resolves.toBe(0); expect(waitForHealthyChild).toHaveBeenCalledWith(18789, 7778, "10.0.0.25"); }); }); it("marks update respawn failures and falls back to in-process restart", async () => { vi.clearAllMocks(); peekGatewaySigusr1RestartReason.mockReturnValue("update.run"); const kill = vi.fn(); respawnGatewayProcessForUpdate.mockReturnValueOnce({ mode: "spawned", pid: 8888, child: { kill }, }); await withIsolatedSignals(async ({ captureSignal }) => { const waitForHealthyChild = vi.fn(async () => false); const closeFirst = vi.fn(async () => {}); const closeSecond = vi.fn(async () => {}); const { runtime, exited } = createRuntimeWithExitSignal(); const start = vi .fn() .mockResolvedValueOnce({ close: closeFirst }) .mockResolvedValueOnce({ close: closeSecond }); await runLoopWithStart({ start, runtime, lockPort: 18789, waitForHealthyChild }); await new Promise((resolve) => { setImmediate(resolve); }); const sigusr1 = captureSignal("SIGUSR1"); const sigterm = captureSignal("SIGTERM"); sigusr1(); await new Promise((resolve) => { setImmediate(resolve); }); expect(waitForHealthyChild).toHaveBeenCalledWith(18789, 8888, "127.0.0.1"); expect(kill).toHaveBeenCalledTimes(1); expect(markUpdateRestartSentinelFailure).toHaveBeenCalledWith("restart-unhealthy"); expect(start).toHaveBeenCalledTimes(2); sigterm(); await expect(exited).resolves.toBe(0); }); }); it("catches SIGTERM handler errors, logs them, and falls back to stop (#83131)", async () => { vi.clearAllMocks(); consumeGatewayRestartIntentPayloadSync.mockImplementationOnce(() => { throw new Error("dynamic import failed"); }); await withIsolatedSignals(async ({ captureSignal }) => { const { close, runtime, exited } = await createSignaledLoopHarness(); const sigterm = captureSignal("SIGTERM"); sigterm(); await expect(exited).resolves.toBe(0); expect(gatewayLog.error).toHaveBeenCalledWith( "failed to handle SIGTERM: Error: dynamic import failed", ); expect(close).toHaveBeenCalledWith({ reason: "gateway stopping", restartExpectedMs: null, }); expect(runtime.exit).toHaveBeenCalledWith(0); }); }); it("catches SIGUSR1 handler errors even when token cleanup throws (#83131)", async () => { vi.clearAllMocks(); consumeGatewayRestartIntentPayloadSync.mockImplementationOnce(() => { throw new Error("lifecycle module corrupted"); }); markGatewaySigusr1RestartHandled.mockImplementationOnce(() => { throw new Error("recovery import also failed"); }); await withIsolatedSignals(async ({ captureSignal }) => { const { close, start, exited } = await createSignaledLoopHarness(); const sigusr1 = captureSignal("SIGUSR1"); const sigterm = captureSignal("SIGTERM"); sigusr1(); await new Promise((resolve) => { setImmediate(resolve); }); await new Promise((resolve) => { setImmediate(resolve); }); expect(gatewayLog.error).toHaveBeenCalledWith( "SIGUSR1 handler failed: lifecycle module corrupted", ); expect(markGatewaySigusr1RestartHandled).toHaveBeenCalled(); expect(close).not.toHaveBeenCalled(); expect(start).toHaveBeenCalledTimes(1); sigterm(); await expect(exited).resolves.toBe(0); }); }); it("catches SIGUSR1 handler errors, clears restart token, and does not crash (#83131)", async () => { vi.clearAllMocks(); consumeGatewayRestartIntentPayloadSync.mockImplementationOnce(() => { throw new Error("sigusr1 lifecycle import failed"); }); await withIsolatedSignals(async ({ captureSignal }) => { const { close, start, exited } = await createSignaledLoopHarness(); const sigusr1 = captureSignal("SIGUSR1"); const sigterm = captureSignal("SIGTERM"); sigusr1(); // The catch handler clears the restart token from the eagerly-loaded // lifecycle runtime, so wait for the async signal body to reject. await new Promise((resolve) => { setImmediate(resolve); }); await new Promise((resolve) => { setImmediate(resolve); }); expect(gatewayLog.error).toHaveBeenCalledWith( "SIGUSR1 handler failed: sigusr1 lifecycle import failed", ); // Restart token must be cleared so future SIGUSR1 restarts are not // permanently coalesced as "already in-flight". expect(markGatewaySigusr1RestartHandled).toHaveBeenCalled(); expect(close).not.toHaveBeenCalled(); expect(start).toHaveBeenCalledTimes(1); sigterm(); await expect(exited).resolves.toBe(0); }); }); }); describe("gateway discover routing helpers", () => { it("prefers resolved service host over TXT hints", () => { const beacon: GatewayBonjourBeacon = { instanceName: "Test", host: "10.0.0.2", port: 18789, lanHost: "evil.example.com", tailnetDns: "evil.example.com", }; expect(pickBeaconHost(beacon)).toBe("10.0.0.2"); }); it("prefers resolved service port over TXT gatewayPort", () => { const beacon: GatewayBonjourBeacon = { instanceName: "Test", host: "10.0.0.2", port: 18789, gatewayPort: 12345, }; expect(pickGatewayPort(beacon)).toBe(18789); }); it("fails closed when resolve data is missing", () => { const beacon: GatewayBonjourBeacon = { instanceName: "Test", lanHost: "test-host.local", gatewayPort: 18789, }; expect(pickBeaconHost(beacon)).toBeNull(); expect(pickGatewayPort(beacon)).toBeNull(); }); });