diff --git a/CHANGELOG.md b/CHANGELOG.md index edf3d1c5335..cddc3cb182e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ Docs: https://docs.openclaw.ai - Tests: clean successful plugin gateway gauntlet isolated temp roots while keeping an explicit preservation switch for failed/debug runs. - Plugins/perf: reuse derived plugin metadata snapshots for the lifetime of the process so reply-time skill setup no longer rescans plugin metadata on every turn. - Discord/OpenAI voice: keep wake-name master consults using the current speaker context after ignored ambient transcripts and shorten the default capture silence grace. +- Doctor: skip redundant Gateway restart prompts when a recent supervisor restart leaves the Gateway healthy. Fixes #86518. (#86533) Thanks @liaoyl830. - Gateway: keep session-only Control UI tool-start mirrors flowing during diagnostic queue pressure instead of silently dropping non-terminal tool updates. - Agents/memory: return optional not-found context for missing date-only daily memory reads instead of logging benign first-run `ENOENT` failures. Fixes #82928. Thanks @galiniliev. - Discord: merge streamed text captions into following media block replies so captions and attachments send as one message. (#86487) Thanks @neeravmakwana. diff --git a/src/commands/doctor-gateway-daemon-flow.test.ts b/src/commands/doctor-gateway-daemon-flow.test.ts index 2c8fee35390..f9c493a4353 100644 --- a/src/commands/doctor-gateway-daemon-flow.test.ts +++ b/src/commands/doctor-gateway-daemon-flow.test.ts @@ -312,7 +312,7 @@ describe("maybeRepairGatewayDaemon", () => { healthOk: false, }); - expect(readGatewayRestartHandoffSync).toHaveBeenCalledOnce(); + expect(readGatewayRestartHandoffSync).toHaveBeenCalledTimes(2); const [handoffEnv] = readGatewayRestartHandoffSync.mock.calls[0] as unknown as [ { OPENCLAW_STATE_DIR?: string; OPENCLAW_CONFIG_PATH?: string }, ]; @@ -324,12 +324,12 @@ describe("maybeRepairGatewayDaemon", () => { ); }); - it("does not read restart handoffs during normal doctor", async () => { + it("does not inspect port connections during normal doctor", async () => { setPlatform("linux"); await runNonInteractiveRepair(); - expect(readGatewayRestartHandoffSync).not.toHaveBeenCalled(); + expect(readGatewayRestartHandoffSync).toHaveBeenCalled(); expect(inspectPortConnections).not.toHaveBeenCalled(); }); @@ -551,4 +551,74 @@ describe("maybeRepairGatewayDaemon", () => { expect(service.install).not.toHaveBeenCalled(); expect(note).toHaveBeenCalledWith(EXTERNAL_SERVICE_REPAIR_NOTE, "Gateway LaunchAgent"); }); + + it("skips restart prompt when gateway is healthy after recent restart handoff in normal doctor flow", async () => { + vi.useFakeTimers(); + vi.setSystemTime(40_000); + setPlatform("linux"); + const handoff = { + kind: "gateway-supervisor-restart-handoff" as const, + version: 1 as const, + intentId: "intent-healthy", + pid: 99_999, + createdAt: 35_000, + expiresAt: 95_000, + reason: "update.run", + source: "gateway-update" as const, + restartKind: "update-process" as const, + supervisorMode: "systemd" as const, + } satisfies GatewayRestartHandoff; + readGatewayRestartHandoffSync.mockReturnValue(handoff); + + await maybeRepairGatewayDaemon({ + cfg: { gateway: {} }, + runtime: { log: vi.fn(), error: vi.fn(), exit: vi.fn() }, + prompter: createPrompter(() => true), + options: { deep: false }, + gatewayDetailsMessage: "details", + healthOk: false, + }); + + expect(readGatewayRestartHandoffSync).toHaveBeenCalled(); + expect(healthCommand).toHaveBeenCalledOnce(); + expect(service.restart).not.toHaveBeenCalled(); + expect(note).toHaveBeenCalledWith( + "Gateway is healthy after recent restart; skipping restart prompt.", + "Gateway", + ); + }); + + it("prompts for restart when health probe fails despite recent restart handoff in normal doctor flow", async () => { + vi.useFakeTimers(); + vi.setSystemTime(40_000); + setPlatform("linux"); + const handoff = { + kind: "gateway-supervisor-restart-handoff" as const, + version: 1 as const, + intentId: "intent-unhealthy", + pid: 88_888, + createdAt: 35_000, + expiresAt: 95_000, + reason: "gateway.restart", + source: "operator-restart" as const, + restartKind: "full-process" as const, + supervisorMode: "systemd" as const, + } satisfies GatewayRestartHandoff; + readGatewayRestartHandoffSync.mockReturnValue(handoff); + healthCommand.mockRejectedValueOnce(new Error("gateway closed")); + + await maybeRepairGatewayDaemon({ + cfg: { gateway: {} }, + runtime: { log: vi.fn(), error: vi.fn(), exit: vi.fn() }, + prompter: createPrompter(() => false), + options: { deep: false }, + gatewayDetailsMessage: "details", + healthOk: false, + }); + + expect(readGatewayRestartHandoffSync).toHaveBeenCalled(); + expect(healthCommand).toHaveBeenCalledOnce(); + expect(service.restart).not.toHaveBeenCalled(); + // The restart prompt was shown but user declined (createPrompter returned false for it). + }); }); diff --git a/src/commands/doctor-gateway-daemon-flow.ts b/src/commands/doctor-gateway-daemon-flow.ts index 14b83e1de5d..360196e81a2 100644 --- a/src/commands/doctor-gateway-daemon-flow.ts +++ b/src/commands/doctor-gateway-daemon-flow.ts @@ -388,11 +388,26 @@ export async function maybeRepairGatewayDaemon(params: { note(EXTERNAL_SERVICE_REPAIR_NOTE, "Gateway"); return; } + + // Check if the gateway was recently restarted (e.g., via SIGUSR1 after an update). + // If a restart handoff exists and the gateway reports healthy, skip the restart prompt + // to avoid racing with the system supervisor and causing a restart loop. + const recentRestart = readGatewayRestartHandoffSync(serviceEnv); + if (recentRestart) { + try { + await healthCommand({ json: false, timeoutMs: 10_000 }, params.runtime); + note("Gateway is healthy after recent restart; skipping restart prompt.", "Gateway"); + return; + } catch { + // Health probe failed — fall through to the restart prompt below. + } + } + const restart = await confirmDoctorServiceRepair( params.prompter, { message: "Restart gateway service now?", - initialValue: true, + initialValue: false, }, serviceRepairPolicy, );