fix(doctor): skip restart prompt when gateway is healthy after recent restart (#86533)

* fix(doctor): skip restart prompt when gateway is healthy after recent restart

`openclaw doctor` unconditionally prompted "Restart gateway service now?"
with default=Yes whenever the gateway was running, even if it had just
restarted via SIGUSR1 after an update. This caused restart loops on macOS
where the prompt raced with launchctl KeepAlive.

Changes:
- Probe gateway health before the restart prompt when a restart handoff
  exists (deep doctor mode). If healthy, skip the prompt entirely.
- Change `initialValue` from `true` to `false` as a safety net so users
  don't accidentally confirm a restart by pressing Enter.
- Update existing test that expected a single `readGatewayRestartHandoffSync`
  call (now called twice: diagnostic display + health-probe check).

Fixes #86518

* fix(doctor): correct GatewayRestartHandoff mock types in tests

Add explicit literal types + satisfies constraint so the mock handoff
objects match the exact GatewayRestartHandoff type expected by the
type-check CI.

* fix(doctor): apply recent-restart skip to normal doctor flow

* test(doctor): align normal-flow handoff expectation

* chore: add doctor restart prompt changelog

---------

Co-authored-by: OpenClaw Contributor <openclaw-contributor@example.com>
Co-authored-by: liaoyl830 <267396060+liaoyl830@users.noreply.github.com>
Co-authored-by: sallyom <somalley@redhat.com>
This commit is contained in:
liaoyl830
2026-05-26 01:53:28 +08:00
committed by GitHub
parent 8129dba5d8
commit 6e8d2dbbbc
3 changed files with 90 additions and 4 deletions

View File

@@ -27,6 +27,7 @@ Docs: https://docs.openclaw.ai
- Tests: clean successful plugin gateway gauntlet isolated temp roots while keeping an explicit preservation switch for failed/debug runs.
- Plugins/perf: reuse derived plugin metadata snapshots for the lifetime of the process so reply-time skill setup no longer rescans plugin metadata on every turn.
- Discord/OpenAI voice: keep wake-name master consults using the current speaker context after ignored ambient transcripts and shorten the default capture silence grace.
- Doctor: skip redundant Gateway restart prompts when a recent supervisor restart leaves the Gateway healthy. Fixes #86518. (#86533) Thanks @liaoyl830.
- Gateway: keep session-only Control UI tool-start mirrors flowing during diagnostic queue pressure instead of silently dropping non-terminal tool updates.
- Agents/memory: return optional not-found context for missing date-only daily memory reads instead of logging benign first-run `ENOENT` failures. Fixes #82928. Thanks @galiniliev.
- Discord: merge streamed text captions into following media block replies so captions and attachments send as one message. (#86487) Thanks @neeravmakwana.

View File

@@ -312,7 +312,7 @@ describe("maybeRepairGatewayDaemon", () => {
healthOk: false,
});
expect(readGatewayRestartHandoffSync).toHaveBeenCalledOnce();
expect(readGatewayRestartHandoffSync).toHaveBeenCalledTimes(2);
const [handoffEnv] = readGatewayRestartHandoffSync.mock.calls[0] as unknown as [
{ OPENCLAW_STATE_DIR?: string; OPENCLAW_CONFIG_PATH?: string },
];
@@ -324,12 +324,12 @@ describe("maybeRepairGatewayDaemon", () => {
);
});
it("does not read restart handoffs during normal doctor", async () => {
it("does not inspect port connections during normal doctor", async () => {
setPlatform("linux");
await runNonInteractiveRepair();
expect(readGatewayRestartHandoffSync).not.toHaveBeenCalled();
expect(readGatewayRestartHandoffSync).toHaveBeenCalled();
expect(inspectPortConnections).not.toHaveBeenCalled();
});
@@ -551,4 +551,74 @@ describe("maybeRepairGatewayDaemon", () => {
expect(service.install).not.toHaveBeenCalled();
expect(note).toHaveBeenCalledWith(EXTERNAL_SERVICE_REPAIR_NOTE, "Gateway LaunchAgent");
});
it("skips restart prompt when gateway is healthy after recent restart handoff in normal doctor flow", async () => {
vi.useFakeTimers();
vi.setSystemTime(40_000);
setPlatform("linux");
const handoff = {
kind: "gateway-supervisor-restart-handoff" as const,
version: 1 as const,
intentId: "intent-healthy",
pid: 99_999,
createdAt: 35_000,
expiresAt: 95_000,
reason: "update.run",
source: "gateway-update" as const,
restartKind: "update-process" as const,
supervisorMode: "systemd" as const,
} satisfies GatewayRestartHandoff;
readGatewayRestartHandoffSync.mockReturnValue(handoff);
await maybeRepairGatewayDaemon({
cfg: { gateway: {} },
runtime: { log: vi.fn(), error: vi.fn(), exit: vi.fn() },
prompter: createPrompter(() => true),
options: { deep: false },
gatewayDetailsMessage: "details",
healthOk: false,
});
expect(readGatewayRestartHandoffSync).toHaveBeenCalled();
expect(healthCommand).toHaveBeenCalledOnce();
expect(service.restart).not.toHaveBeenCalled();
expect(note).toHaveBeenCalledWith(
"Gateway is healthy after recent restart; skipping restart prompt.",
"Gateway",
);
});
it("prompts for restart when health probe fails despite recent restart handoff in normal doctor flow", async () => {
vi.useFakeTimers();
vi.setSystemTime(40_000);
setPlatform("linux");
const handoff = {
kind: "gateway-supervisor-restart-handoff" as const,
version: 1 as const,
intentId: "intent-unhealthy",
pid: 88_888,
createdAt: 35_000,
expiresAt: 95_000,
reason: "gateway.restart",
source: "operator-restart" as const,
restartKind: "full-process" as const,
supervisorMode: "systemd" as const,
} satisfies GatewayRestartHandoff;
readGatewayRestartHandoffSync.mockReturnValue(handoff);
healthCommand.mockRejectedValueOnce(new Error("gateway closed"));
await maybeRepairGatewayDaemon({
cfg: { gateway: {} },
runtime: { log: vi.fn(), error: vi.fn(), exit: vi.fn() },
prompter: createPrompter(() => false),
options: { deep: false },
gatewayDetailsMessage: "details",
healthOk: false,
});
expect(readGatewayRestartHandoffSync).toHaveBeenCalled();
expect(healthCommand).toHaveBeenCalledOnce();
expect(service.restart).not.toHaveBeenCalled();
// The restart prompt was shown but user declined (createPrompter returned false for it).
});
});

View File

@@ -388,11 +388,26 @@ export async function maybeRepairGatewayDaemon(params: {
note(EXTERNAL_SERVICE_REPAIR_NOTE, "Gateway");
return;
}
// Check if the gateway was recently restarted (e.g., via SIGUSR1 after an update).
// If a restart handoff exists and the gateway reports healthy, skip the restart prompt
// to avoid racing with the system supervisor and causing a restart loop.
const recentRestart = readGatewayRestartHandoffSync(serviceEnv);
if (recentRestart) {
try {
await healthCommand({ json: false, timeoutMs: 10_000 }, params.runtime);
note("Gateway is healthy after recent restart; skipping restart prompt.", "Gateway");
return;
} catch {
// Health probe failed — fall through to the restart prompt below.
}
}
const restart = await confirmDoctorServiceRepair(
params.prompter,
{
message: "Restart gateway service now?",
initialValue: true,
initialValue: false,
},
serviceRepairPolicy,
);