fix: land SIGUSR1 orphan recovery regressions (#47719) (thanks @joeykrug)

This commit is contained in:
Peter Steinberger
2026-03-16 05:31:41 +00:00
parent 98f6ec50aa
commit 680eff63fb
7 changed files with 64 additions and 11 deletions

View File

@@ -8,6 +8,15 @@ const acquireGatewayLock = vi.fn(async (_opts?: { port?: number }) => ({
const consumeGatewaySigusr1RestartAuthorization = vi.fn(() => true);
const isGatewaySigusr1RestartExternallyAllowed = vi.fn(() => false);
const markGatewaySigusr1RestartHandled = vi.fn();
const scheduleGatewaySigusr1Restart = vi.fn((_opts?: { delayMs?: number; reason?: string }) => ({
ok: true,
pid: process.pid,
signal: "SIGUSR1" as const,
delayMs: 0,
mode: "emit" as const,
coalesced: false,
cooldownMsApplied: 0,
}));
const getActiveTaskCount = vi.fn(() => 0);
const markGatewayDraining = vi.fn();
const waitForActiveTasks = vi.fn(async (_timeoutMs: number) => ({ drained: true }));
@@ -35,6 +44,8 @@ vi.mock("../../infra/restart.js", () => ({
consumeGatewaySigusr1RestartAuthorization: () => consumeGatewaySigusr1RestartAuthorization(),
isGatewaySigusr1RestartExternallyAllowed: () => isGatewaySigusr1RestartExternallyAllowed(),
markGatewaySigusr1RestartHandled: () => markGatewaySigusr1RestartHandled(),
scheduleGatewaySigusr1Restart: (opts?: { delayMs?: number; reason?: string }) =>
scheduleGatewaySigusr1Restart(opts),
}));
vi.mock("../../infra/process-respawn.js", () => ({
@@ -292,6 +303,28 @@ describe("runGatewayLoop", () => {
});
});
it("routes external SIGUSR1 through the restart scheduler before draining", async () => {
vi.clearAllMocks();
consumeGatewaySigusr1RestartAuthorization.mockReturnValueOnce(false);
isGatewaySigusr1RestartExternallyAllowed.mockReturnValueOnce(true);
await withIsolatedSignals(async ({ captureSignal }) => {
const { close, start } = await createSignaledLoopHarness();
const sigusr1 = captureSignal("SIGUSR1");
sigusr1();
await new Promise<void>((resolve) => setImmediate(resolve));
expect(scheduleGatewaySigusr1Restart).toHaveBeenCalledWith({
delayMs: 0,
reason: "SIGUSR1",
});
expect(close).not.toHaveBeenCalled();
expect(start).toHaveBeenCalledTimes(1);
expect(markGatewaySigusr1RestartHandled).not.toHaveBeenCalled();
});
});
it("releases the lock before exiting on spawned restart", async () => {
vi.clearAllMocks();

View File

@@ -10,6 +10,7 @@ import {
consumeGatewaySigusr1RestartAuthorization,
isGatewaySigusr1RestartExternallyAllowed,
markGatewaySigusr1RestartHandled,
scheduleGatewaySigusr1Restart,
} from "../../infra/restart.js";
import { createSubsystemLogger } from "../../logging/subsystem.js";
import {
@@ -186,10 +187,20 @@ export async function runGatewayLoop(params: {
const onSigusr1 = () => {
gatewayLog.info("signal SIGUSR1 received");
const authorized = consumeGatewaySigusr1RestartAuthorization();
if (!authorized && !isGatewaySigusr1RestartExternallyAllowed()) {
gatewayLog.warn(
"SIGUSR1 restart ignored (not authorized; commands.restart=false or use gateway tool).",
);
if (!authorized) {
if (!isGatewaySigusr1RestartExternallyAllowed()) {
gatewayLog.warn(
"SIGUSR1 restart ignored (not authorized; commands.restart=false or use gateway tool).",
);
return;
}
if (shuttingDown) {
gatewayLog.info("received SIGUSR1 during shutdown; ignoring");
return;
}
// External SIGUSR1 requests should still reuse the in-process restart
// scheduler so idle drain and restart coalescing stay consistent.
scheduleGatewaySigusr1Restart({ delayMs: 0, reason: "SIGUSR1" });
return;
}
markGatewaySigusr1RestartHandled();