From 2cf9ed782d9a2cb07986ad2facc71e2a51fbdca4 Mon Sep 17 00:00:00 2001 From: neo1027144 Date: Fri, 10 Apr 2026 23:23:46 +0800 Subject: [PATCH] fix(daemon): prevent systemd restart storm on config validation failure Exit gateway configuration failures with EX_CONFIG and teach generated systemd units not to restart on that exit status.\n\nCo-authored-by: neo1027144-creator --- CHANGELOG.md | 1 + src/cli/gateway-cli/run.option-collisions.test.ts | 2 +- src/cli/gateway-cli/run.ts | 13 ++++++++++--- src/daemon/systemd-unit.test.ts | 3 +++ src/daemon/systemd-unit.ts | 3 +++ 5 files changed, 18 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 04884bab1eb..4ba63d7e469 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -108,6 +108,7 @@ Docs: https://docs.openclaw.ai - Config/plugins: use plugin-owned command alias metadata when `plugins.allow` contains runtime command names like `dreaming`, and point users at the owning plugin instead of stale plugin-not-found guidance. (#64242) Thanks @feiskyer. - Agents/Gemini: strip orphaned `required` entries from Gemini tool schemas so provider validation no longer rejects tools after schema cleanup or union flattening. (#64284) Thanks @xxxxxmax. - Assistant text: strip Qwen-style XML tool call payloads from visible replies so web and channel messages no longer show raw `` output. (#64214) Thanks @MoerAI. +- Daemon/gateway: prevent systemd restart storms on configuration errors by exiting with `EX_CONFIG` and adding generated unit restart-prevention guards. (#63913) Thanks @neo1027144-creator. ## 2026.4.9 diff --git a/src/cli/gateway-cli/run.option-collisions.test.ts b/src/cli/gateway-cli/run.option-collisions.test.ts index 59159538132..29bba16aacd 100644 --- a/src/cli/gateway-cli/run.option-collisions.test.ts +++ b/src/cli/gateway-cli/run.option-collisions.test.ts @@ -247,7 +247,7 @@ describe("gateway run option collisions", () => { }, }; - await expect(runGatewayCli(["gateway", "run"])).rejects.toThrow("__exit__:1"); + await expect(runGatewayCli(["gateway", "run"])).rejects.toThrow("__exit__:78"); expect(runtimeErrors).toContain( "Gateway start blocked: existing config is missing gateway.mode. Treat this as suspicious or clobbered config. Re-run `openclaw onboard --mode local` or `openclaw setup`, set gateway.mode=local manually, or pass --allow-unconfigured.", diff --git a/src/cli/gateway-cli/run.ts b/src/cli/gateway-cli/run.ts index e0a86561be8..bcf23811923 100644 --- a/src/cli/gateway-cli/run.ts +++ b/src/cli/gateway-cli/run.ts @@ -97,6 +97,13 @@ const GATEWAY_RUN_BOOLEAN_KEYS = [ const SUPERVISED_GATEWAY_LOCK_RETRY_MS = 5000; +/** + * EX_CONFIG (78) from sysexits.h — used for configuration errors so systemd + * (via RestartPreventExitStatus=78) stops restarting instead of entering a + * restart storm that can render low-resource hosts unresponsive. + */ +const EXIT_CONFIG_ERROR = 78; + const GATEWAY_AUTH_MODES: readonly GatewayAuthMode[] = [ "none", "token", @@ -429,7 +436,7 @@ async function runGatewayCommand(opts: GatewayRunOpts) { for (const error of guardErrors) { defaultRuntime.error(error); } - defaultRuntime.exit(1); + defaultRuntime.exit(EXIT_CONFIG_ERROR); return; } const miskeys = extractGatewayMiskeys(snapshot?.parsed); @@ -487,7 +494,7 @@ async function runGatewayCommand(opts: GatewayRunOpts) { .filter(Boolean) .join("\n"), ); - defaultRuntime.exit(1); + defaultRuntime.exit(EXIT_CONFIG_ERROR); return; } if (resolvedAuthMode === "none") { @@ -517,7 +524,7 @@ async function runGatewayCommand(opts: GatewayRunOpts) { .filter(Boolean) .join("\n"), ); - defaultRuntime.exit(1); + defaultRuntime.exit(EXIT_CONFIG_ERROR); return; } const tailscaleOverride = diff --git a/src/daemon/systemd-unit.test.ts b/src/daemon/systemd-unit.test.ts index 0a94a1c6b4b..9c8a759bc92 100644 --- a/src/daemon/systemd-unit.test.ts +++ b/src/daemon/systemd-unit.test.ts @@ -22,6 +22,9 @@ describe("buildSystemdUnit", () => { expect(unit).toContain("TimeoutStopSec=30"); expect(unit).toContain("TimeoutStartSec=30"); expect(unit).toContain("SuccessExitStatus=0 143"); + expect(unit).toContain("StartLimitBurst=5"); + expect(unit).toContain("StartLimitIntervalSec=60"); + expect(unit).toContain("RestartPreventExitStatus=78"); }); it("rejects environment values with line breaks", () => { diff --git a/src/daemon/systemd-unit.ts b/src/daemon/systemd-unit.ts index 0d2d44715f4..d1ac77c1afa 100644 --- a/src/daemon/systemd-unit.ts +++ b/src/daemon/systemd-unit.ts @@ -54,11 +54,14 @@ export function buildSystemdUnit({ descriptionLine, "After=network-online.target", "Wants=network-online.target", + "StartLimitBurst=5", + "StartLimitIntervalSec=60", "", "[Service]", `ExecStart=${execStart}`, "Restart=always", "RestartSec=5", + "RestartPreventExitStatus=78", "TimeoutStopSec=30", "TimeoutStartSec=30", "SuccessExitStatus=0 143",