fix: harden Windows gateway restart fallback (#69056)

Thanks @Thatgfsj.
This commit is contained in:
Thatgfsj
2026-04-28 14:57:47 +08:00
committed by GitHub
parent 07c653e913
commit 3dff1272e9
3 changed files with 142 additions and 4 deletions

View File

@@ -15,6 +15,7 @@ Docs: https://docs.openclaw.ai
- Build/runtime: write the runtime-postbuild stamp after `pnpm build` writes the build stamp, so the next CLI invocation does not re-sync runtime artifacts after a successful build. Fixes #73151. Thanks @bittoby.
- CLI/model probes: reject empty or whitespace-only `infer model run --prompt` values before calling local providers or the Gateway, so smoke checks do not spend provider calls on invalid turns. Fixes #73185. Thanks @iot2edge.
- Gateway/media: route text-only `chat.send` image offloads through media-understanding fields so `agents.defaults.imageModel` can describe WebChat attachments instead of leaving only an opaque `media://inbound` marker. Fixes #72968. Thanks @vorajeeah.
- Gateway/Windows: route no-listener restart handoffs through the Windows supervisor without leaving restart tokens in flight, so failed task scheduling can be retried and successful handoffs do not coalesce later restart requests. (#69056) Thanks @Thatgfsj.
- Gateway/sessions: remove automatic oversized `sessions.json` rotation backups, deprecate `session.maintenance.rotateBytes`, and teach `openclaw doctor --fix` to remove the ignored key so hot session writes no longer copy multi-MB stores. Refs #72338. Thanks @midhunmonachan and @DougButdorf.
- Channels/Telegram: fail fast when Telegram rejects the startup `getMe` token probe with 401, so invalid or stale BotFather tokens are reported as token auth failures instead of misleading `deleteWebhook` cleanup failures. Fixes #47674. Thanks @samaedan-arch.
- ACPX: keep generated Codex and Claude ACP wrapper startup paths working when remote or special state filesystems reject chmod, since OpenClaw invokes the wrappers through Node instead of executing them directly. Fixes #73333. Thanks @david-garcia-garcia.

View File

@@ -19,10 +19,76 @@ import {
} from "./restart.js";
import { listTailnetAddresses } from "./tailnet.js";
const relaunchGatewayScheduledTaskMock = vi.hoisted(() => vi.fn());
const cleanStaleGatewayProcessesSyncMock = vi.hoisted(() => vi.fn());
const findGatewayPidsOnPortSyncMock = vi.hoisted(() => vi.fn());
vi.mock("./restart-stale-pids.js", () => ({
cleanStaleGatewayProcessesSync: (...args: unknown[]) =>
cleanStaleGatewayProcessesSyncMock(...args),
findGatewayPidsOnPortSync: (...args: unknown[]) => findGatewayPidsOnPortSyncMock(...args),
}));
vi.mock("./windows-task-restart.js", () => ({
relaunchGatewayScheduledTask: (...args: unknown[]) => relaunchGatewayScheduledTaskMock(...args),
}));
const originalPlatformDescriptor = Object.getOwnPropertyDescriptor(process, "platform");
function setPlatform(platform: NodeJS.Platform): void {
if (!originalPlatformDescriptor) {
return;
}
Object.defineProperty(process, "platform", {
...originalPlatformDescriptor,
value: platform,
});
}
function withoutSigusr1Listeners(fn: () => void): void {
const listeners = process.listeners("SIGUSR1");
process.removeAllListeners("SIGUSR1");
try {
fn();
} finally {
process.removeAllListeners("SIGUSR1");
for (const listener of listeners) {
process.on("SIGUSR1", listener);
}
}
}
function withRestartSupervisorEnabled(fn: () => void): void {
const originalVitest = process.env.VITEST;
const originalNodeEnv = process.env.NODE_ENV;
delete process.env.VITEST;
delete process.env.NODE_ENV;
try {
fn();
} finally {
if (originalVitest === undefined) {
delete process.env.VITEST;
} else {
process.env.VITEST = originalVitest;
}
if (originalNodeEnv === undefined) {
delete process.env.NODE_ENV;
} else {
process.env.NODE_ENV = originalNodeEnv;
}
}
}
describe("infra runtime", () => {
function setupRestartSignalSuite() {
beforeEach(() => {
__testing.resetSigusr1State();
relaunchGatewayScheduledTaskMock.mockReset();
relaunchGatewayScheduledTaskMock.mockReturnValue({ ok: true, method: "schtasks" });
cleanStaleGatewayProcessesSyncMock.mockReset();
cleanStaleGatewayProcessesSyncMock.mockReturnValue([]);
findGatewayPidsOnPortSyncMock.mockReset();
findGatewayPidsOnPortSyncMock.mockReturnValue([]);
vi.useFakeTimers();
vi.spyOn(process, "kill").mockImplementation(() => true);
});
@@ -33,6 +99,9 @@ describe("infra runtime", () => {
clearConfigCache();
await vi.runOnlyPendingTimersAsync();
vi.useRealTimers();
if (originalPlatformDescriptor) {
Object.defineProperty(process, "platform", originalPlatformDescriptor);
}
vi.restoreAllMocks();
});
}
@@ -80,6 +149,53 @@ describe("infra runtime", () => {
}
});
it("uses the SIGUSR1 listener path on Windows when the run loop is active", () => {
setPlatform("win32");
const emitSpy = vi.spyOn(process, "emit");
const handler = () => {};
process.on("SIGUSR1", handler);
try {
expect(emitGatewayRestart()).toBe(true);
expect(emitSpy).toHaveBeenCalledWith("SIGUSR1");
expect(relaunchGatewayScheduledTaskMock).not.toHaveBeenCalled();
} finally {
process.removeListener("SIGUSR1", handler);
}
});
it("uses the Windows supervisor fallback without leaving a restart cycle in flight", () => {
setPlatform("win32");
withoutSigusr1Listeners(() => {
withRestartSupervisorEnabled(() => {
relaunchGatewayScheduledTaskMock.mockReturnValueOnce({ ok: true, method: "schtasks" });
expect(emitGatewayRestart("windows-fallback")).toBe(true);
expect(relaunchGatewayScheduledTaskMock).toHaveBeenCalledTimes(1);
expect(consumeGatewaySigusr1RestartAuthorization()).toBe(false);
const next = scheduleGatewaySigusr1Restart({ delayMs: 0, reason: "next" });
expect(next.coalesced).toBe(false);
expect(next.mode).toBe("supervisor");
});
});
});
it("rolls back the Windows supervisor fallback when scheduling fails", () => {
setPlatform("win32");
withoutSigusr1Listeners(() => {
withRestartSupervisorEnabled(() => {
relaunchGatewayScheduledTaskMock
.mockReturnValueOnce({ ok: false, method: "schtasks", detail: "denied" })
.mockReturnValueOnce({ ok: true, method: "schtasks" });
expect(emitGatewayRestart("windows-fallback")).toBe(false);
expect(consumeGatewaySigusr1RestartAuthorization()).toBe(false);
expect(emitGatewayRestart("windows-retry")).toBe(true);
expect(relaunchGatewayScheduledTaskMock).toHaveBeenCalledTimes(2);
});
});
});
it("coalesces duplicate scheduled restarts into a single pending timer", async () => {
const emitSpy = vi.spyOn(process, "emit");
const handler = () => {};

View File

@@ -265,14 +265,28 @@ export function emitGatewayRestart(reasonOverride?: string): boolean {
authorizeGatewaySigusr1Restart();
try {
if (process.listenerCount("SIGUSR1") > 0) {
// Signal path: let the run-loop's SIGUSR1 handler drive restart.
// Works on all platforms including Windows when a listener is registered.
process.emit("SIGUSR1");
} else if (process.platform === "win32") {
// On Windows with no SIGUSR1 listener, fall back to task-scheduler handoff.
// triggerOpenClawRestart() uses schtasks to restart the gateway.
const result = triggerOpenClawRestart();
if (!result.ok) {
// Roll back the cycle marker so future restart requests can still proceed.
rollBackGatewayRestartEmission();
restartLog.warn("Windows scheduled task restart failed, token rolled back");
return false;
}
consumeGatewaySigusr1RestartAuthorization();
markGatewaySigusr1RestartHandled();
} else {
// Unix without listener: send signal directly.
process.kill(process.pid, "SIGUSR1");
}
} catch {
// Roll back the cycle marker so future restart requests can still proceed.
emittedRestartToken = consumedRestartToken;
emittedRestartReason = undefined;
rollBackGatewayRestartEmission();
return false;
}
lastRestartEmittedAt = Date.now();
@@ -335,6 +349,12 @@ export function markGatewaySigusr1RestartHandled(): void {
}
}
function rollBackGatewayRestartEmission(): void {
emittedRestartToken = consumedRestartToken;
emittedRestartReason = undefined;
consumeGatewaySigusr1RestartAuthorization();
}
export type RestartDeferralHooks = {
onDeferring?: (pending: number) => void;
onStillPending?: (pending: number, elapsedMs: number) => void;
@@ -617,7 +637,7 @@ export type ScheduledRestart = {
signal: "SIGUSR1";
delayMs: number;
reason?: string;
mode: "emit" | "signal";
mode: "emit" | "signal" | "supervisor";
coalesced: boolean;
cooldownMsApplied: number;
};
@@ -637,7 +657,8 @@ export function scheduleGatewaySigusr1Restart(opts?: {
typeof opts?.reason === "string" && opts.reason.trim()
? opts.reason.trim().slice(0, 200)
: undefined;
const mode = process.listenerCount("SIGUSR1") > 0 ? "emit" : "signal";
const hasSigusr1Listener = process.listenerCount("SIGUSR1") > 0;
const mode = hasSigusr1Listener ? "emit" : process.platform === "win32" ? "supervisor" : "signal";
const nowMs = Date.now();
const cooldownMsApplied = Math.max(0, lastRestartEmittedAt + RESTART_COOLDOWN_MS - nowMs);
const requestedDueAt = nowMs + delayMs + cooldownMsApplied;