mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 14:30:45 +00:00
fix: repair stale gateway service on start
This commit is contained in:
@@ -31,7 +31,7 @@ import {
|
||||
} from "./shared.js";
|
||||
import type { DaemonInstallOptions } from "./types.js";
|
||||
|
||||
function mergeInstallInvocationEnv(params: {
|
||||
export function mergeInstallInvocationEnv(params: {
|
||||
env: NodeJS.ProcessEnv;
|
||||
existingServiceEnv?: Record<string, string>;
|
||||
}): NodeJS.ProcessEnv {
|
||||
|
||||
@@ -353,6 +353,55 @@ describe("runServiceRestart token drift", () => {
|
||||
expect(payload.message).toBe("restart scheduled, gateway will restart momentarily");
|
||||
});
|
||||
|
||||
it("repairs stale loaded services during start before reporting success", async () => {
|
||||
service.readCommand.mockResolvedValue({
|
||||
programArguments: ["openclaw", "gateway"],
|
||||
environment: { OPENCLAW_SERVICE_VERSION: "2026.4.24" },
|
||||
});
|
||||
const repairLoadedService = vi.fn(async () => ({
|
||||
result: "started" as const,
|
||||
message: "Gateway service definition repaired and started.",
|
||||
warnings: ["service was installed by OpenClaw 2026.4.24, current CLI is 2026.5.2"],
|
||||
loaded: true,
|
||||
}));
|
||||
|
||||
await runServiceStart({
|
||||
serviceNoun: "Gateway",
|
||||
service,
|
||||
renderStartHints: () => [],
|
||||
opts: { json: true },
|
||||
repairLoadedService,
|
||||
});
|
||||
|
||||
expect(repairLoadedService).toHaveBeenCalledTimes(1);
|
||||
expect(service.restart).not.toHaveBeenCalled();
|
||||
const payload = readJsonLog<{
|
||||
result?: string;
|
||||
message?: string;
|
||||
warnings?: string[];
|
||||
service?: { loaded?: boolean };
|
||||
}>();
|
||||
expect(payload.result).toBe("started");
|
||||
expect(payload.message).toBe("Gateway service definition repaired and started.");
|
||||
expect(payload.warnings?.[0]).toContain("service was installed by OpenClaw");
|
||||
expect(payload.service?.loaded).toBe(true);
|
||||
});
|
||||
|
||||
it("fails start with an install hint when a stale loaded service has no repair callback", async () => {
|
||||
service.readCommand.mockResolvedValue({
|
||||
programArguments: ["openclaw", "gateway"],
|
||||
environment: { OPENCLAW_SERVICE_VERSION: "2026.4.24" },
|
||||
});
|
||||
|
||||
await expect(runServiceStart(createServiceRunArgs())).rejects.toThrow("__exit__:1");
|
||||
|
||||
const payload = readJsonLog<{ ok?: boolean; error?: string; hints?: string[] }>();
|
||||
expect(payload.ok).toBe(false);
|
||||
expect(payload.error).toContain("service needs repair");
|
||||
expect(payload.hints).toEqual(["openclaw gateway install --force"]);
|
||||
expect(service.restart).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("fails start when restarting a stopped installed service errors", async () => {
|
||||
service.isLoaded.mockResolvedValue(false);
|
||||
service.restart.mockRejectedValue(new Error("launchctl kickstart failed: permission denied"));
|
||||
|
||||
@@ -5,6 +5,7 @@ import { formatConfigIssueLines } from "../../config/issue-format.js";
|
||||
import { resolveIsNixMode } from "../../config/paths.js";
|
||||
import { checkTokenDrift } from "../../daemon/service-audit.js";
|
||||
import type { GatewayServiceRestartResult } from "../../daemon/service-types.js";
|
||||
import type { GatewayServiceStartRepairIssue, GatewayServiceState } from "../../daemon/service.js";
|
||||
import { describeGatewayServiceRestart, startGatewayService } from "../../daemon/service.js";
|
||||
import type { GatewayService } from "../../daemon/service.js";
|
||||
import { renderSystemdUnavailableHints } from "../../daemon/systemd-hints.js";
|
||||
@@ -16,6 +17,7 @@ import {
|
||||
} from "../../infra/restart.js";
|
||||
import { isWSL } from "../../infra/wsl.js";
|
||||
import { defaultRuntime } from "../../runtime.js";
|
||||
import { formatCliCommand } from "../command-format.js";
|
||||
import { resolveGatewayTokenForDriftCheck } from "./gateway-token-drift.js";
|
||||
import {
|
||||
buildDaemonServiceSnapshot,
|
||||
@@ -48,6 +50,11 @@ type ServiceRecoveryContext = {
|
||||
fail: (message: string, hints?: string[]) => void;
|
||||
};
|
||||
|
||||
type ServiceStartRepairContext = ServiceRecoveryContext & {
|
||||
state: GatewayServiceState;
|
||||
issues: GatewayServiceStartRepairIssue[];
|
||||
};
|
||||
|
||||
async function maybeAugmentSystemdHints(hints: string[]): Promise<string[]> {
|
||||
if (process.platform !== "linux") {
|
||||
return hints;
|
||||
@@ -221,6 +228,7 @@ export async function runServiceStart(params: {
|
||||
renderStartHints: () => string[];
|
||||
opts?: DaemonLifecycleOptions;
|
||||
onNotLoaded?: (ctx: ServiceRecoveryContext) => Promise<ServiceRecoveryResult | null>;
|
||||
repairLoadedService?: (ctx: ServiceStartRepairContext) => Promise<ServiceRecoveryResult | null>;
|
||||
}) {
|
||||
const json = Boolean(params.opts?.json);
|
||||
const { stdout, emit, fail } = createDaemonActionContext({ action: "start", json });
|
||||
@@ -298,6 +306,41 @@ export async function runServiceStart(params: {
|
||||
});
|
||||
return;
|
||||
}
|
||||
if (startResult.outcome === "repair-required") {
|
||||
try {
|
||||
const handled = await params.repairLoadedService?.({
|
||||
json,
|
||||
stdout,
|
||||
fail,
|
||||
state: startResult.state,
|
||||
issues: startResult.issues,
|
||||
});
|
||||
if (handled) {
|
||||
emit({
|
||||
ok: true,
|
||||
result: handled.result,
|
||||
message: handled.message,
|
||||
warnings: handled.warnings,
|
||||
service: buildDaemonServiceSnapshot(params.service, handled.loaded ?? true),
|
||||
});
|
||||
if (!json && handled.message) {
|
||||
defaultRuntime.log(handled.message);
|
||||
}
|
||||
return;
|
||||
}
|
||||
} catch (err) {
|
||||
const hints = params.renderStartHints();
|
||||
fail(`${params.serviceNoun} repair failed: ${String(err)}`, hints);
|
||||
return;
|
||||
}
|
||||
fail(
|
||||
`${params.serviceNoun} service needs repair before it can start: ${startResult.issues
|
||||
.map((issue) => issue.message)
|
||||
.join("; ")}`,
|
||||
[formatCliCommand("openclaw gateway install --force")],
|
||||
);
|
||||
return;
|
||||
}
|
||||
emit({
|
||||
ok: true,
|
||||
result: "started",
|
||||
|
||||
@@ -52,6 +52,7 @@ const probeGateway = vi.fn<
|
||||
const isRestartEnabled = vi.fn<(config?: { commands?: unknown }) => boolean>(() => true);
|
||||
const loadConfig = vi.hoisted(() => vi.fn(() => ({})));
|
||||
const recoverInstalledLaunchAgent = vi.hoisted(() => vi.fn());
|
||||
const repairLoadedGatewayServiceForStart = vi.hoisted(() => vi.fn());
|
||||
|
||||
vi.mock("../../config/config.js", () => ({
|
||||
getRuntimeConfig: () => loadConfig(),
|
||||
@@ -89,6 +90,10 @@ vi.mock("./launchd-recovery.js", () => ({
|
||||
recoverInstalledLaunchAgent(args),
|
||||
}));
|
||||
|
||||
vi.mock("./start-repair.js", () => ({
|
||||
repairLoadedGatewayServiceForStart: (args: unknown) => repairLoadedGatewayServiceForStart(args),
|
||||
}));
|
||||
|
||||
vi.mock("./restart-health.js", () => ({
|
||||
DEFAULT_RESTART_HEALTH_ATTEMPTS: 120,
|
||||
DEFAULT_RESTART_HEALTH_DELAY_MS: 500,
|
||||
@@ -160,6 +165,7 @@ describe("runDaemonRestart health checks", () => {
|
||||
isRestartEnabled.mockReset();
|
||||
loadConfig.mockReset();
|
||||
recoverInstalledLaunchAgent.mockReset();
|
||||
repairLoadedGatewayServiceForStart.mockReset();
|
||||
|
||||
service.readCommand.mockResolvedValue({
|
||||
programArguments: ["openclaw", "gateway", "--port", "18789"],
|
||||
@@ -224,6 +230,46 @@ describe("runDaemonRestart health checks", () => {
|
||||
expect(recoverInstalledLaunchAgent).toHaveBeenCalledWith({ result: "started" });
|
||||
});
|
||||
|
||||
it("repairs stale loaded service definitions from gateway start", async () => {
|
||||
repairLoadedGatewayServiceForStart.mockResolvedValue({
|
||||
result: "started",
|
||||
message: "Gateway service definition repaired and started.",
|
||||
loaded: true,
|
||||
});
|
||||
runServiceStart.mockImplementation(
|
||||
async (params: {
|
||||
repairLoadedService?: (args: {
|
||||
json: boolean;
|
||||
stdout: NodeJS.WritableStream;
|
||||
state: unknown;
|
||||
issues: unknown[];
|
||||
}) => Promise<unknown>;
|
||||
}) => {
|
||||
await params.repairLoadedService?.({
|
||||
json: true,
|
||||
stdout: process.stdout,
|
||||
state: { command: { environment: { OPENCLAW_SERVICE_VERSION: "2026.4.24" } } },
|
||||
issues: [{ code: "version-mismatch", message: "old service" }],
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
await runDaemonStart({ json: true });
|
||||
|
||||
expect(repairLoadedGatewayServiceForStart).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
service,
|
||||
json: true,
|
||||
state: expect.objectContaining({
|
||||
command: expect.objectContaining({
|
||||
environment: { OPENCLAW_SERVICE_VERSION: "2026.4.24" },
|
||||
}),
|
||||
}),
|
||||
issues: [expect.objectContaining({ code: "version-mismatch" })],
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("kills stale gateway pids and retries restart", async () => {
|
||||
const unhealthy: RestartHealthSnapshot = {
|
||||
healthy: false,
|
||||
|
||||
@@ -29,6 +29,7 @@ import {
|
||||
waitForGatewayHealthyRestart,
|
||||
} from "./restart-health.js";
|
||||
import { parsePortFromArgs, renderGatewayServiceStartHints } from "./shared.js";
|
||||
import { repairLoadedGatewayServiceForStart } from "./start-repair.js";
|
||||
import type { DaemonLifecycleOptions } from "./types.js";
|
||||
|
||||
const POST_RESTART_HEALTH_ATTEMPTS = DEFAULT_RESTART_HEALTH_ATTEMPTS;
|
||||
@@ -150,14 +151,23 @@ export async function runDaemonUninstall(opts: DaemonLifecycleOptions = {}) {
|
||||
}
|
||||
|
||||
export async function runDaemonStart(opts: DaemonLifecycleOptions = {}) {
|
||||
const service = resolveGatewayService();
|
||||
return await runServiceStart({
|
||||
serviceNoun: "Gateway",
|
||||
service: resolveGatewayService(),
|
||||
service,
|
||||
renderStartHints: renderGatewayServiceStartHints,
|
||||
onNotLoaded:
|
||||
process.platform === "darwin"
|
||||
? async () => await recoverInstalledLaunchAgent({ result: "started" })
|
||||
: undefined,
|
||||
repairLoadedService: async ({ json, stdout, state, issues }) =>
|
||||
await repairLoadedGatewayServiceForStart({
|
||||
service,
|
||||
json,
|
||||
stdout,
|
||||
state,
|
||||
issues,
|
||||
}),
|
||||
opts,
|
||||
});
|
||||
}
|
||||
|
||||
94
src/cli/daemon-cli/start-repair.ts
Normal file
94
src/cli/daemon-cli/start-repair.ts
Normal file
@@ -0,0 +1,94 @@
|
||||
import { buildGatewayInstallPlan } from "../../commands/daemon-install-helpers.js";
|
||||
import { DEFAULT_GATEWAY_DAEMON_RUNTIME } from "../../commands/daemon-runtime.js";
|
||||
import { resolveGatewayInstallToken } from "../../commands/gateway-install-token.js";
|
||||
import { readConfigFileSnapshotForWrite } from "../../config/io.js";
|
||||
import { resolveGatewayPort } from "../../config/paths.js";
|
||||
import { OPENCLAW_WRAPPER_ENV_KEY, resolveOpenClawWrapperPath } from "../../daemon/program-args.js";
|
||||
import type { GatewayServiceEnv } from "../../daemon/service-types.js";
|
||||
import type {
|
||||
GatewayService,
|
||||
GatewayServiceStartRepairIssue,
|
||||
GatewayServiceState,
|
||||
} from "../../daemon/service.js";
|
||||
import { formatGatewayServiceStartRepairIssues } from "../../daemon/service.js";
|
||||
import { defaultRuntime } from "../../runtime.js";
|
||||
import { mergeInstallInvocationEnv } from "./install.js";
|
||||
|
||||
export async function repairLoadedGatewayServiceForStart(params: {
|
||||
service: GatewayService;
|
||||
state: GatewayServiceState;
|
||||
issues: GatewayServiceStartRepairIssue[];
|
||||
json: boolean;
|
||||
stdout: NodeJS.WritableStream;
|
||||
}): Promise<{ result: "started"; message: string; warnings?: string[]; loaded: boolean }> {
|
||||
const { snapshot: configSnapshot, writeOptions: configWriteOptions } =
|
||||
await readConfigFileSnapshotForWrite();
|
||||
const cfg = configSnapshot.valid ? configSnapshot.sourceConfig : configSnapshot.config;
|
||||
const existingEnvironment = params.state.command?.environment;
|
||||
const installEnv = mergeInstallInvocationEnv({
|
||||
env: process.env,
|
||||
existingServiceEnv: existingEnvironment,
|
||||
});
|
||||
const wrapperPath = await resolveOpenClawWrapperPath(installEnv[OPENCLAW_WRAPPER_ENV_KEY]);
|
||||
const port = resolveGatewayPort(cfg);
|
||||
|
||||
const tokenResolution = await resolveGatewayInstallToken({
|
||||
config: cfg,
|
||||
configSnapshot,
|
||||
configWriteOptions,
|
||||
env: installEnv,
|
||||
autoGenerateWhenMissing: true,
|
||||
persistGeneratedToken: true,
|
||||
});
|
||||
if (tokenResolution.unavailableReason) {
|
||||
throw new Error(tokenResolution.unavailableReason);
|
||||
}
|
||||
|
||||
const warnings = [
|
||||
formatGatewayServiceStartRepairIssues(params.issues),
|
||||
...tokenResolution.warnings,
|
||||
].filter((warning) => warning.trim().length > 0);
|
||||
if (!params.json) {
|
||||
defaultRuntime.log("Gateway service definition needs repair:");
|
||||
for (const warning of warnings) {
|
||||
defaultRuntime.log(`- ${warning}`);
|
||||
}
|
||||
}
|
||||
|
||||
const { programArguments, workingDirectory, environment } = await buildGatewayInstallPlan({
|
||||
env: installEnv,
|
||||
port,
|
||||
runtime: DEFAULT_GATEWAY_DAEMON_RUNTIME,
|
||||
wrapperPath,
|
||||
existingEnvironment,
|
||||
config: cfg,
|
||||
warn: (message) => {
|
||||
warnings.push(message);
|
||||
if (!params.json) {
|
||||
defaultRuntime.log(`- ${message}`);
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
await params.service.install({
|
||||
env: installEnv as GatewayServiceEnv,
|
||||
stdout: params.stdout,
|
||||
programArguments,
|
||||
workingDirectory,
|
||||
environment,
|
||||
});
|
||||
|
||||
let loaded = true;
|
||||
try {
|
||||
loaded = await params.service.isLoaded({ env: installEnv });
|
||||
} catch {
|
||||
loaded = true;
|
||||
}
|
||||
|
||||
return {
|
||||
result: "started",
|
||||
message: "Gateway service definition repaired and started.",
|
||||
warnings: warnings.length ? warnings : undefined,
|
||||
loaded,
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user