From b11933d8a9ea6e17947f5d37da02a68e91efde8b Mon Sep 17 00:00:00 2001 From: merlin Date: Thu, 5 Mar 2026 18:36:39 +0800 Subject: [PATCH] fix(gateway): catch startup failure in run loop to prevent process exit (#35862) When an in-process restart (SIGUSR1) triggers a config-triggered restart and the new config is invalid, params.start() throws and the while loop exits, killing the process. On macOS this loses TCC permissions. Wrap params.start() in try/catch: on failure, set server=null, log the error, and wait for the next SIGUSR1 instead of crashing. --- src/cli/gateway-cli/run-loop.ts | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/cli/gateway-cli/run-loop.ts b/src/cli/gateway-cli/run-loop.ts index 684e0a65c16..c076eac040f 100644 --- a/src/cli/gateway-cli/run-loop.ts +++ b/src/cli/gateway-cli/run-loop.ts @@ -193,7 +193,19 @@ export async function runGatewayLoop(params: { // eslint-disable-next-line no-constant-condition while (true) { onIteration(); - server = await params.start(); + try { + server = await params.start(); + } catch (err) { + // If startup fails (e.g., invalid config after a config-triggered + // restart), keep the process alive and wait for the next SIGUSR1 + // instead of crashing. A crash here would respawn a new process that + // loses macOS Full Disk Access (TCC permissions are PID-bound). (#35862) + server = null; + gatewayLog.error( + `gateway startup failed: ${err instanceof Error ? err.message : String(err)}. ` + + "Process will stay alive; fix the issue and restart.", + ); + } await new Promise((resolve) => { restartResolver = resolve; });