From 188d6fef24e3590faf54b812964deda8faea48c0 Mon Sep 17 00:00:00 2001 From: Egor Dementyev Date: Sun, 26 Apr 2026 12:50:36 +0300 Subject: [PATCH] Gateway: bound shutdown hook waits --- docs/automation/hooks.md | 4 +++ src/gateway/server-close.test.ts | 61 +++++++++++++++++++++++++++++++- src/gateway/server-close.ts | 42 ++++++++++++++++++++-- 3 files changed, 104 insertions(+), 3 deletions(-) diff --git a/docs/automation/hooks.md b/docs/automation/hooks.md index 13db57a7d7d..39a83fd5681 100644 --- a/docs/automation/hooks.md +++ b/docs/automation/hooks.md @@ -44,6 +44,8 @@ openclaw hooks info session-memory | `session:patch` | When session properties are modified | | `agent:bootstrap` | Before workspace bootstrap files are injected | | `gateway:startup` | After channels start and hooks are loaded | +| `gateway:shutdown` | When gateway shutdown begins | +| `gateway:pre-restart` | Before an expected gateway restart | | `message:received` | Inbound message from any channel | | `message:transcribed` | After audio transcription completes | | `message:preprocessed` | After all media and link understanding completes | @@ -131,6 +133,8 @@ lifecycle, not an agent-finalization gate. Plugins that need to inspect a natural final answer and ask the agent for one more pass should use the typed plugin hook `before_agent_finalize` instead. See [Plugin hooks](/plugins/hooks). +**Gateway lifecycle events**: `gateway:shutdown` includes `reason` and `restartExpectedMs` and fires when gateway shutdown begins. `gateway:pre-restart` includes the same context but only fires when shutdown is part of an expected restart and a finite `restartExpectedMs` value is supplied. During shutdown, each lifecycle hook wait is best-effort and bounded so shutdown continues if a handler stalls. + ## Hook discovery Hooks are discovered from these directories, in order of increasing override precedence: diff --git a/src/gateway/server-close.test.ts b/src/gateway/server-close.test.ts index f40faa35076..5c2f17f683a 100644 --- a/src/gateway/server-close.test.ts +++ b/src/gateway/server-close.test.ts @@ -1,4 +1,4 @@ -import { beforeEach, describe, expect, it, vi } from "vitest"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import type { InternalHookEvent } from "../hooks/internal-hooks.js"; type TriggerInternalHookMock = (event: InternalHookEvent) => Promise; @@ -13,6 +13,7 @@ const WEBSOCKET_CLOSE_GRACE_MS = 1_000; const WEBSOCKET_CLOSE_FORCE_CONTINUE_MS = 250; const HTTP_CLOSE_GRACE_MS = 1_000; const HTTP_CLOSE_FORCE_WAIT_MS = 5_000; +const GATEWAY_LIFECYCLE_HOOK_TIMEOUT_MS = 1_000; vi.mock("../channels/plugins/index.js", async () => ({ ...(await vi.importActual( @@ -106,6 +107,10 @@ describe("createGatewayCloseHandler", () => { mocks.triggerInternalHook.mockResolvedValue(undefined); }); + afterEach(() => { + vi.useRealTimers(); + }); + it("emits gateway shutdown and pre-restart hooks", async () => { const close = createGatewayCloseHandler(createGatewayCloseTestDeps()); @@ -131,6 +136,60 @@ describe("createGatewayCloseHandler", () => { }); }); + it("continues shutdown when gateway shutdown hook stalls", async () => { + vi.useFakeTimers(); + mocks.triggerInternalHook.mockImplementation((event: InternalHookEvent) => { + if (event.action === "shutdown") { + return new Promise(() => undefined); + } + return Promise.resolve(undefined); + }); + const stopTaskRegistryMaintenance = vi.fn(); + const close = createGatewayCloseHandler( + createGatewayCloseTestDeps({ stopTaskRegistryMaintenance }), + ); + + const closePromise = close({ reason: "test shutdown" }); + await vi.advanceTimersByTimeAsync(GATEWAY_LIFECYCLE_HOOK_TIMEOUT_MS); + await closePromise; + + expect(stopTaskRegistryMaintenance).toHaveBeenCalledTimes(1); + expect( + mocks.logWarn.mock.calls.some(([message]) => + String(message).includes("gateway:shutdown hook timed out after 1000ms"), + ), + ).toBe(true); + }); + + it("continues restart shutdown when gateway pre-restart hook stalls", async () => { + vi.useFakeTimers(); + mocks.triggerInternalHook.mockImplementation((event: InternalHookEvent) => { + if (event.action === "pre-restart") { + return new Promise(() => undefined); + } + return Promise.resolve(undefined); + }); + const stopTaskRegistryMaintenance = vi.fn(); + const close = createGatewayCloseHandler( + createGatewayCloseTestDeps({ stopTaskRegistryMaintenance }), + ); + + const closePromise = close({ + reason: "test restart", + restartExpectedMs: 123, + }); + await vi.advanceTimersByTimeAsync(GATEWAY_LIFECYCLE_HOOK_TIMEOUT_MS); + await closePromise; + + expect(stopTaskRegistryMaintenance).toHaveBeenCalledTimes(1); + expect(mocks.triggerInternalHook).toHaveBeenCalledTimes(2); + expect( + mocks.logWarn.mock.calls.some(([message]) => + String(message).includes("gateway:pre-restart hook timed out after 1000ms"), + ), + ).toBe(true); + }); + it("unsubscribes lifecycle listeners during shutdown", async () => { const lifecycleUnsub = vi.fn(); const stopTaskRegistryMaintenance = vi.fn(); diff --git a/src/gateway/server-close.ts b/src/gateway/server-close.ts index 51f3fa6d1cc..00e6382b5e7 100644 --- a/src/gateway/server-close.ts +++ b/src/gateway/server-close.ts @@ -12,6 +12,8 @@ import type { PluginServicesHandle } from "../plugins/services.js"; import { normalizeOptionalString } from "../shared/string-coerce.js"; const shutdownLog = createSubsystemLogger("gateway/shutdown"); +const GATEWAY_SHUTDOWN_HOOK_TIMEOUT_MS = 1_000; +const GATEWAY_PRE_RESTART_HOOK_TIMEOUT_MS = 1_000; const WEBSOCKET_CLOSE_GRACE_MS = 1_000; const WEBSOCKET_CLOSE_FORCE_CONTINUE_MS = 250; const HTTP_CLOSE_GRACE_MS = 1_000; @@ -45,6 +47,34 @@ function createTimeoutRace(timeoutMs: number, onTimeout: () => T) { }; } +async function triggerGatewayLifecycleHookWithTimeout(params: { + event: ReturnType; + hookName: "gateway:shutdown" | "gateway:pre-restart"; + timeoutMs: number; +}): Promise { + let timeout: ReturnType | undefined; + const hookPromise = triggerInternalHook(params.event); + void hookPromise.catch(() => undefined); + try { + const result = await Promise.race([ + hookPromise.then(() => "completed" as const), + new Promise<"timeout">((resolve) => { + timeout = setTimeout(() => resolve("timeout"), params.timeoutMs); + timeout.unref?.(); + }), + ]); + if (result === "timeout") { + shutdownLog.warn( + `${params.hookName} hook timed out after ${params.timeoutMs}ms; continuing shutdown`, + ); + } + } finally { + if (timeout) { + clearTimeout(timeout); + } + } +} + export async function runGatewayClosePrelude(params: { stopDiagnostics?: () => void; clearSkillsRefreshTimer?: () => void; @@ -119,7 +149,11 @@ export function createGatewayCloseHandler(params: { reason, restartExpectedMs, }); - await triggerInternalHook(shutdownEvent); + await triggerGatewayLifecycleHookWithTimeout({ + event: shutdownEvent, + hookName: "gateway:shutdown", + timeoutMs: GATEWAY_SHUTDOWN_HOOK_TIMEOUT_MS, + }); if (restartExpectedMs !== null) { const preRestartEvent = createInternalHookEvent( "gateway", @@ -130,7 +164,11 @@ export function createGatewayCloseHandler(params: { restartExpectedMs, }, ); - await triggerInternalHook(preRestartEvent); + await triggerGatewayLifecycleHookWithTimeout({ + event: preRestartEvent, + hookName: "gateway:pre-restart", + timeoutMs: GATEWAY_PRE_RESTART_HOOK_TIMEOUT_MS, + }); } } catch { // Best-effort only; shutdown should proceed even if hooks fail.