mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-17 21:10:54 +00:00
Daemon: back off unhealthy gateway restarts
This commit is contained in:
@@ -1,9 +1,9 @@
|
||||
import fs from "node:fs/promises";
|
||||
|
||||
// launchd applies ThrottleInterval to any rapid relaunch, including
|
||||
// intentional gateway restarts. Keep it low so CLI restarts and forced
|
||||
// reinstalls do not stall for a full minute.
|
||||
export const LAUNCH_AGENT_THROTTLE_INTERVAL_SECONDS = 1;
|
||||
// launchd applies ThrottleInterval to any rapid relaunch, including config-crash
|
||||
// loops. Intentional gateway restarts use launchctl kickstart, so a higher value
|
||||
// here primarily slows unhealthy restart storms without making operator restarts sluggish.
|
||||
export const LAUNCH_AGENT_THROTTLE_INTERVAL_SECONDS = 30;
|
||||
// launchd stores plist integer values in decimal; 0o077 renders as 63 (owner-only files).
|
||||
export const LAUNCH_AGENT_UMASK_DECIMAL = 0o077;
|
||||
|
||||
@@ -113,5 +113,5 @@ export function buildLaunchAgentPlist({
|
||||
? `\n <key>Comment</key>\n <string>${plistEscape(comment.trim())}</string>`
|
||||
: "";
|
||||
const envXml = renderEnvDict(environment);
|
||||
return `<?xml version="1.0" encoding="UTF-8"?>\n<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">\n<plist version="1.0">\n <dict>\n <key>Label</key>\n <string>${plistEscape(label)}</string>\n ${commentXml}\n <key>RunAtLoad</key>\n <true/>\n <key>KeepAlive</key>\n <true/>\n <key>ThrottleInterval</key>\n <integer>${LAUNCH_AGENT_THROTTLE_INTERVAL_SECONDS}</integer>\n <key>Umask</key>\n <integer>${LAUNCH_AGENT_UMASK_DECIMAL}</integer>\n <key>ProgramArguments</key>\n <array>${argsXml}\n </array>\n ${workingDirXml}\n <key>StandardOutPath</key>\n <string>${plistEscape(stdoutPath)}</string>\n <key>StandardErrorPath</key>\n <string>${plistEscape(stderrPath)}</string>${envXml}\n </dict>\n</plist>\n`;
|
||||
return `<?xml version="1.0" encoding="UTF-8"?>\n<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">\n<plist version="1.0">\n <dict>\n <key>Label</key>\n <string>${plistEscape(label)}</string>\n ${commentXml}\n <key>RunAtLoad</key>\n <true/>\n <key>KeepAlive</key>\n <dict>\n <key>SuccessfulExit</key>\n <false/>\n </dict>\n <key>ThrottleInterval</key>\n <integer>${LAUNCH_AGENT_THROTTLE_INTERVAL_SECONDS}</integer>\n <key>Umask</key>\n <integer>${LAUNCH_AGENT_UMASK_DECIMAL}</integer>\n <key>ProgramArguments</key>\n <array>${argsXml}\n </array>\n ${workingDirXml}\n <key>StandardOutPath</key>\n <string>${plistEscape(stdoutPath)}</string>\n <key>StandardErrorPath</key>\n <string>${plistEscape(stderrPath)}</string>${envXml}\n </dict>\n</plist>\n`;
|
||||
}
|
||||
|
||||
@@ -189,7 +189,7 @@ describe("launchd install", () => {
|
||||
expect(plist).toContain(`<string>${tmpDir}</string>`);
|
||||
});
|
||||
|
||||
it("writes KeepAlive=true policy with restrictive umask", async () => {
|
||||
it("writes KeepAlive restart-on-failure policy with restrictive umask", async () => {
|
||||
const env = createDefaultLaunchdEnv();
|
||||
await installLaunchAgent({
|
||||
env,
|
||||
@@ -200,8 +200,9 @@ describe("launchd install", () => {
|
||||
const plistPath = resolveLaunchAgentPlistPath(env);
|
||||
const plist = state.files.get(plistPath) ?? "";
|
||||
expect(plist).toContain("<key>KeepAlive</key>");
|
||||
expect(plist).toContain("<true/>");
|
||||
expect(plist).not.toContain("<key>SuccessfulExit</key>");
|
||||
expect(plist).toContain("<dict>");
|
||||
expect(plist).toContain("<key>SuccessfulExit</key>");
|
||||
expect(plist).toContain("<false/>");
|
||||
expect(plist).toContain("<key>Umask</key>");
|
||||
expect(plist).toContain(`<integer>${LAUNCH_AGENT_UMASK_DECIMAL}</integer>`);
|
||||
expect(plist).toContain("<key>ThrottleInterval</key>");
|
||||
|
||||
@@ -171,7 +171,10 @@ async function auditLaunchdPlist(
|
||||
}
|
||||
|
||||
const hasRunAtLoad = /<key>RunAtLoad<\/key>\s*<true\s*\/>/i.test(content);
|
||||
const hasKeepAlive = /<key>KeepAlive<\/key>\s*<true\s*\/>/i.test(content);
|
||||
const hasKeepAlive =
|
||||
/<key>KeepAlive<\/key>\s*(?:<true\s*\/>|<dict>[\s\S]*?<key>SuccessfulExit<\/key>\s*<false\s*\/>[\s\S]*?<\/dict>)/i.test(
|
||||
content,
|
||||
);
|
||||
if (!hasRunAtLoad) {
|
||||
issues.push({
|
||||
code: SERVICE_AUDIT_CODES.launchdRunAtLoad,
|
||||
|
||||
@@ -21,6 +21,16 @@ describe("buildSystemdUnit", () => {
|
||||
expect(unit).toContain("KillMode=control-group");
|
||||
});
|
||||
|
||||
it("restarts only on failure", () => {
|
||||
const unit = buildSystemdUnit({
|
||||
description: "OpenClaw Gateway",
|
||||
programArguments: ["/usr/bin/openclaw", "gateway", "run"],
|
||||
environment: {},
|
||||
});
|
||||
expect(unit).toContain("Restart=on-failure");
|
||||
expect(unit).not.toContain("Restart=always");
|
||||
});
|
||||
|
||||
it("rejects environment values with line breaks", () => {
|
||||
expect(() =>
|
||||
buildSystemdUnit({
|
||||
|
||||
@@ -57,7 +57,7 @@ export function buildSystemdUnit({
|
||||
"",
|
||||
"[Service]",
|
||||
`ExecStart=${execStart}`,
|
||||
"Restart=always",
|
||||
"Restart=on-failure",
|
||||
"RestartSec=5",
|
||||
// Keep service children in the same lifecycle so restarts do not leave
|
||||
// orphan ACP/runtime workers behind.
|
||||
|
||||
Reference in New Issue
Block a user