From d16efadc00d452a1db67fd6823c7a8ddf2a16460 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 16 May 2026 12:50:38 +0100 Subject: [PATCH] fix(gateway): quiet startup retry closes Co-authored-by: JARVIS-Glasses <284122573+JARVIS-Glasses@users.noreply.github.com> Co-authored-by: WhatsSkiLL <284126683+IWhatsskill@users.noreply.github.com> --- CHANGELOG.md | 1 + src/gateway/protocol/startup-unavailable.ts | 3 ++ .../server/ws-connection.startup.test.ts | 30 +++++++++++++++++-- src/gateway/server/ws-connection.ts | 13 ++++++-- .../server/ws-connection/message-handler.ts | 7 +++-- 5 files changed, 46 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 27db072b806..aaaaf8f0328 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ Docs: https://docs.openclaw.ai - CLI/dashboard: probe Gateway readiness before handing out the dashboard URL, prompting to start or install the managed service when the Gateway is stopped and printing recovery commands instead of opening a dead browser tab. - CLI: hide decorative startup and status emoji on terminals that are unlikely to render them correctly, keeping semantic message and identity emoji intact. - CLI/gateway: recover the Linux user systemd bus environment when `openclaw dashboard` starts the Gateway from stripped desktop shells such as VNC terminals. +- Gateway/WebSocket: log expected startup `1013 gateway starting` retry closes at debug instead of warn while preserving WARN for unexpected pre-connect failures. Fixes #76361. (#82457) Thanks @IWhatsskill. - CLI/context engines: bootstrap and finalize non-legacy context engines for CLI turns while preserving transcript snapshots and deferred maintenance ownership. (#81869) Thanks @sahilsatralkar. - Telegram: persist polling updates through restart replay so queued same-topic messages resume in order instead of losing context after a gateway restart. (#82256) Thanks @VACInc. - Gateway/Gmail: abort in-flight Gmail watcher startup and hot-reload restarts before shutdown so reloads cannot spawn `gog serve` after the Gateway is closing. Thanks @frankekn. diff --git a/src/gateway/protocol/startup-unavailable.ts b/src/gateway/protocol/startup-unavailable.ts index d30e853a51f..dffadcc7bcb 100644 --- a/src/gateway/protocol/startup-unavailable.ts +++ b/src/gateway/protocol/startup-unavailable.ts @@ -1,4 +1,7 @@ export const GATEWAY_STARTUP_UNAVAILABLE_REASON = "startup-sidecars"; +export const GATEWAY_STARTUP_PENDING_CLOSE_CAUSE = "startup-sidecars-pending"; +export const GATEWAY_STARTUP_CLOSE_CODE = 1013; +export const GATEWAY_STARTUP_CLOSE_REASON = "gateway starting"; export const GATEWAY_STARTUP_RETRY_AFTER_MS = 500; const GATEWAY_STARTUP_RETRY_MIN_MS = 100; const GATEWAY_STARTUP_RETRY_MAX_MS = 2_000; diff --git a/src/gateway/server/ws-connection.startup.test.ts b/src/gateway/server/ws-connection.startup.test.ts index 9bd161fc55d..0af25d5ebb8 100644 --- a/src/gateway/server/ws-connection.startup.test.ts +++ b/src/gateway/server/ws-connection.startup.test.ts @@ -3,7 +3,12 @@ import { describe, expect, it, vi } from "vitest"; import type { WebSocketServer } from "ws"; import { GATEWAY_CLIENT_MODES, GATEWAY_CLIENT_NAMES } from "../protocol/client-info.js"; import { PROTOCOL_VERSION } from "../protocol/index.js"; -import { GATEWAY_STARTUP_UNAVAILABLE_REASON } from "../protocol/startup-unavailable.js"; +import { + GATEWAY_STARTUP_CLOSE_CODE, + GATEWAY_STARTUP_CLOSE_REASON, + GATEWAY_STARTUP_PENDING_CLOSE_CAUSE, + GATEWAY_STARTUP_UNAVAILABLE_REASON, +} from "../protocol/startup-unavailable.js"; import { attachGatewayWsConnectionHandler } from "./ws-connection.js"; function createLogger() { @@ -51,6 +56,7 @@ describe("attachGatewayWsConnectionHandler startup readiness", () => { headers: { host: "127.0.0.1:19001" }, socket: { localAddress: "127.0.0.1" }, }; + const logWsControl = createLogger(); attachGatewayWsConnectionHandler({ wss, @@ -64,7 +70,7 @@ describe("attachGatewayWsConnectionHandler startup readiness", () => { refreshHealthSnapshot: vi.fn(async () => ({}) as never), logGateway: createLogger() as never, logHealth: createLogger() as never, - logWsControl: createLogger() as never, + logWsControl: logWsControl as never, extraHandlers: {}, broadcast: vi.fn(), buildRequestContext: () => createRequestContext() as never, @@ -134,7 +140,25 @@ describe("attachGatewayWsConnectionHandler startup readiness", () => { expect(response?.error?.retryAfterMs).toBe(500); expect(response?.error?.details).toEqual({ reason: GATEWAY_STARTUP_UNAVAILABLE_REASON }); await vi.waitFor(() => { - expect(socket.close).toHaveBeenCalledWith(1013, "gateway starting"); + expect(socket.close).toHaveBeenCalledWith( + GATEWAY_STARTUP_CLOSE_CODE, + GATEWAY_STARTUP_CLOSE_REASON, + ); }); + expect(logWsControl.debug).toHaveBeenCalledWith( + expect.stringContaining("closed before connect"), + expect.objectContaining({ + cause: GATEWAY_STARTUP_PENDING_CLOSE_CAUSE, + handshake: "failed", + }), + ); + expect(logWsControl.debug).toHaveBeenCalledWith( + expect.stringContaining(`code=${GATEWAY_STARTUP_CLOSE_CODE}`), + expect.anything(), + ); + expect(logWsControl.warn).not.toHaveBeenCalledWith( + expect.stringContaining("closed before connect"), + expect.anything(), + ); }); }); diff --git a/src/gateway/server/ws-connection.ts b/src/gateway/server/ws-connection.ts index e9167d6d459..69acb3c5d6e 100644 --- a/src/gateway/server/ws-connection.ts +++ b/src/gateway/server/ws-connection.ts @@ -16,6 +16,10 @@ import { resolveHostedPluginSurfaceUrl } from "../hosted-plugin-surface-url.js"; import type { GatewayMethodRegistry } from "../methods/registry.js"; import { isLoopbackAddress } from "../net.js"; import type { PluginNodeCapabilitySurface } from "../plugin-node-capability.js"; +import { + GATEWAY_STARTUP_CLOSE_CODE, + GATEWAY_STARTUP_PENDING_CLOSE_CAUSE, +} from "../protocol/startup-unavailable.js"; import { MAX_PAYLOAD_BYTES, MAX_PREAUTH_PAYLOAD_BYTES } from "../server-constants.js"; import { clearNodeWakeState } from "../server-methods/nodes-wake-state.js"; import type { GatewayRequestContext, GatewayRequestHandlers } from "../server-methods/types.js"; @@ -377,9 +381,12 @@ export function attachGatewayWsConnectionHandler(params: AttachGatewayWsConnecti ...closeMeta, }; if (!client) { - const logFn = isNoisySwiftPmHelperClose(requestUserAgent, remoteAddr) - ? logWsControl.debug - : logWsControl.warn; + const isExpectedStartupRetryClose = + closeCause === GATEWAY_STARTUP_PENDING_CLOSE_CAUSE && code === GATEWAY_STARTUP_CLOSE_CODE; + const logFn = + isNoisySwiftPmHelperClose(requestUserAgent, remoteAddr) || isExpectedStartupRetryClose + ? logWsControl.debug + : logWsControl.warn; logFn( `closed before connect conn=${connId} peer=${endpoint ?? "n/a"} remote=${remoteAddr ?? "?"} fwd=${logForwardedFor || "n/a"} origin=${logOrigin || "n/a"} host=${logHost || "n/a"} ua=${logUserAgent || "n/a"} code=${code ?? "n/a"} reason=${logReason || "n/a"}`, closeContext, diff --git a/src/gateway/server/ws-connection/message-handler.ts b/src/gateway/server/ws-connection/message-handler.ts index e56b3c2c0a9..13b12915bb6 100644 --- a/src/gateway/server/ws-connection/message-handler.ts +++ b/src/gateway/server/ws-connection/message-handler.ts @@ -99,6 +99,9 @@ import { } from "../../protocol/index.js"; import { gatewayStartupUnavailableDetails, + GATEWAY_STARTUP_CLOSE_CODE, + GATEWAY_STARTUP_CLOSE_REASON, + GATEWAY_STARTUP_PENDING_CLOSE_CAUSE, GATEWAY_STARTUP_RETRY_AFTER_MS, } from "../../protocol/startup-unavailable.js"; import { parseGatewayRole } from "../../role-policy.js"; @@ -514,7 +517,7 @@ export function attachGatewayWsMessageHandler(params: GatewayWsMessageHandlerPar }; if (isStartupPending?.()) { - markHandshakeFailure("startup-sidecars-pending"); + markHandshakeFailure(GATEWAY_STARTUP_PENDING_CLOSE_CAUSE); await sendFrame({ type: "res", id: frame.id, @@ -525,7 +528,7 @@ export function attachGatewayWsMessageHandler(params: GatewayWsMessageHandlerPar details: gatewayStartupUnavailableDetails(), }), }).catch(() => {}); - queueMicrotask(() => close(1013, "gateway starting")); + queueMicrotask(() => close(GATEWAY_STARTUP_CLOSE_CODE, GATEWAY_STARTUP_CLOSE_REASON)); return; }