diff --git a/CHANGELOG.md b/CHANGELOG.md index 8239bc1b347..76b0789ffc2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ Docs: https://docs.openclaw.ai - Plugins/runtime-deps: cache unchanged bundled runtime mirror dist-file materialization decisions and close file-lock handles on owner-write failures, reducing repeated startup chunk scans and avoiding FileHandle-GC recovery stalls. Refs #73532. Thanks @oadiazp and @bstanbury. - CLI/TUI: keep `chat.history` off model-catalog discovery so initial Gateway-backed TUI history loads cannot block behind slow provider/plugin model scans on low-core hosts. Refs #73524. Thanks @harshcatsystems-collab. - Channels/WhatsApp: flag recently reconnected linked accounts in channel status even when the socket is currently healthy, so flapping WhatsApp Web sessions no longer look clean after a brief reconnect. Refs #73602. Thanks @Vksh07. +- Gateway: expose `gateway.handshakeTimeoutMs` in config, schema, and docs while preserving `OPENCLAW_HANDSHAKE_TIMEOUT_MS` precedence, so loaded or low-powered hosts can tune local WebSocket pre-auth handshakes without patching dist files. Supersedes #51282; refs #73592 and #73652. Thanks @henry-the-frog. - Agents/model selection: resolve slash-form aliases before provider/model parsing and keep alias-resolved primary models subject to transient provider cooldowns, so cron and persisted sessions do not retry cooled-down raw aliases. Fixes #73573 and #73657. Thanks @akai-shuuichi and @hashslingers. - Agents/Claude CLI: reuse already-cached macOS Keychain credentials for no-prompt Claude credential reads, so doctor/runtime checks do not miss fresh interactive Claude auth. Fixes #73682. Thanks @RyanSandoval. - Agents/transcripts: strip empty assistant text blocks while preserving valid text, images, and signatures, so Anthropic-style providers no longer reject sanitized transcript turns. Fixes #73640. Thanks @jowhee327. diff --git a/docs/.generated/config-baseline.sha256 b/docs/.generated/config-baseline.sha256 index f10c8561ddf..0afdb43e9fa 100644 --- a/docs/.generated/config-baseline.sha256 +++ b/docs/.generated/config-baseline.sha256 @@ -1,4 +1,4 @@ -1265c4249f2740b6786b295d5a88391ba7eb0c30bdf460c60dfb4dfcb4153685 config-baseline.json -805bd3f63ff7327da45c01b78dbc990ed53bd13b89e0cbf50f319aa99334ba92 config-baseline.core.json +d4c98bce7b547349b9cbbe08ec1018eafce9900502d7794df993d07fdec0e2e0 config-baseline.json +6ce74b2ab3544e5375009a435a2360a3095e6bd759bb7dd8114293fb8a0e2b25 config-baseline.core.json 0e38bad86bdc96c38573f6d51ac9e6fc5306cc20fb4a454399c57c105a61ba87 config-baseline.channel.json 0dd6583fafae6c9134e46c4cf9bddee9822d6436436dcb1a6dcba6d012962e51 config-baseline.plugin.json diff --git a/docs/gateway/configuration-reference.md b/docs/gateway/configuration-reference.md index f0112b69be8..a687a308a81 100644 --- a/docs/gateway/configuration-reference.md +++ b/docs/gateway/configuration-reference.md @@ -441,6 +441,7 @@ See [Plugins](/tools/plugin). - Relay-backed registrations are delegated to a specific gateway identity. The paired iOS app fetches `gateway.identity.get`, includes that identity in the relay registration, and forwards a registration-scoped send grant to the gateway. Another gateway cannot reuse that stored registration. - `OPENCLAW_APNS_RELAY_BASE_URL` / `OPENCLAW_APNS_RELAY_TIMEOUT_MS`: temporary env overrides for the relay config above. - `OPENCLAW_APNS_RELAY_ALLOW_HTTP=true`: development-only escape hatch for loopback HTTP relay URLs. Production relay URLs should stay on HTTPS. +- `gateway.handshakeTimeoutMs`: pre-auth Gateway WebSocket handshake timeout in milliseconds. Default: `15000`. `OPENCLAW_HANDSHAKE_TIMEOUT_MS` takes precedence when set. Increase this on loaded or low-powered hosts where local clients can connect while startup warmup is still settling. - `gateway.channelHealthCheckMinutes`: channel health-monitor interval in minutes. Set `0` to disable health-monitor restarts globally. Default: `5`. - `gateway.channelStaleEventThresholdMinutes`: stale-socket threshold in minutes. Keep this greater than or equal to `gateway.channelHealthCheckMinutes`. Default: `30`. - `gateway.channelMaxRestartsPerHour`: maximum health-monitor restarts per channel/account in a rolling hour. Default: `10`. diff --git a/docs/gateway/configuration.md b/docs/gateway/configuration.md index d5f2ae049e5..974961cb76c 100644 --- a/docs/gateway/configuration.md +++ b/docs/gateway/configuration.md @@ -270,6 +270,24 @@ cannot roll back unrelated user settings. + + Give local clients more time to complete the pre-auth WebSocket handshake on + loaded or low-powered hosts: + + ```json5 + { + gateway: { + handshakeTimeoutMs: 30000, + }, + } + ``` + + - Default is `15000` milliseconds. + - `OPENCLAW_HANDSHAKE_TIMEOUT_MS` still takes precedence for one-off service or shell overrides. + - Prefer fixing startup/event-loop stalls first; this knob is for hosts that are healthy but slow during warmup. + + + Sessions control conversation continuity and isolation: diff --git a/src/config/config-misc.test.ts b/src/config/config-misc.test.ts index 4d33ee6abf8..7e1f1797f59 100644 --- a/src/config/config-misc.test.ts +++ b/src/config/config-misc.test.ts @@ -409,6 +409,27 @@ describe("gateway.tools config", () => { }); describe("gateway.channelHealthCheckMinutes", () => { + it("accepts preauth handshake timeout tuning", () => { + const res = validateConfigObject({ + gateway: { + handshakeTimeoutMs: 30_000, + }, + }); + expect(res.ok).toBe(true); + }); + + it("rejects non-positive preauth handshake timeouts", () => { + const res = validateConfigObject({ + gateway: { + handshakeTimeoutMs: 0, + }, + }); + expect(res.ok).toBe(false); + if (!res.ok) { + expect(res.issues[0]?.path).toBe("gateway.handshakeTimeoutMs"); + } + }); + it("accepts zero to disable monitor", () => { const res = validateConfigObject({ gateway: { diff --git a/src/config/schema.base.generated.ts b/src/config/schema.base.generated.ts index 9f101633086..2459f2ebfe0 100644 --- a/src/config/schema.base.generated.ts +++ b/src/config/schema.base.generated.ts @@ -22343,6 +22343,14 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = { }, additionalProperties: false, }, + handshakeTimeoutMs: { + type: "integer", + minimum: 1, + maximum: 9007199254740991, + title: "Gateway Handshake Timeout", + description: + "Pre-auth Gateway WebSocket handshake timeout in milliseconds. Use higher values on loaded or low-powered hosts where local clients can connect during startup warmup. OPENCLAW_HANDSHAKE_TIMEOUT_MS still takes precedence.", + }, channelHealthCheckMinutes: { type: "integer", minimum: 0, @@ -24645,6 +24653,11 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = { help: "Explicit gateway-level tool denylist to block risky tools even if lower-level policies allow them. Use deny rules for emergency response and defense-in-depth hardening.", tags: ["access", "network"], }, + "gateway.handshakeTimeoutMs": { + label: "Gateway Handshake Timeout", + help: "Pre-auth Gateway WebSocket handshake timeout in milliseconds. Use higher values on loaded or low-powered hosts where local clients can connect during startup warmup. OPENCLAW_HANDSHAKE_TIMEOUT_MS still takes precedence.", + tags: ["network", "performance"], + }, "gateway.channelHealthCheckMinutes": { label: "Gateway Channel Health Check Interval (min)", help: "Interval in minutes for automatic channel health probing and status updates. Use lower intervals for faster detection, or higher intervals to reduce periodic probe noise.", diff --git a/src/config/schema.help.ts b/src/config/schema.help.ts index be6a7e81ca4..09bc54cac22 100644 --- a/src/config/schema.help.ts +++ b/src/config/schema.help.ts @@ -95,6 +95,8 @@ export const FIELD_HELP: Record = { "Explicit gateway-level tool allowlist when you want a narrow set of tools available at runtime. Use this for locked-down environments where tool scope must be tightly controlled.", "gateway.tools.deny": "Explicit gateway-level tool denylist to block risky tools even if lower-level policies allow them. Use deny rules for emergency response and defense-in-depth hardening.", + "gateway.handshakeTimeoutMs": + "Pre-auth Gateway WebSocket handshake timeout in milliseconds. Use higher values on loaded or low-powered hosts where local clients can connect during startup warmup. OPENCLAW_HANDSHAKE_TIMEOUT_MS still takes precedence.", "gateway.channelHealthCheckMinutes": "Interval in minutes for automatic channel health probing and status updates. Use lower intervals for faster detection, or higher intervals to reduce periodic probe noise.", "gateway.channelStaleEventThresholdMinutes": diff --git a/src/config/schema.labels.ts b/src/config/schema.labels.ts index 381a349b521..cd432a480e1 100644 --- a/src/config/schema.labels.ts +++ b/src/config/schema.labels.ts @@ -119,6 +119,7 @@ export const FIELD_LABELS: Record = { "gateway.tools": "Gateway Tool Exposure Policy", "gateway.tools.allow": "Gateway Tool Allowlist", "gateway.tools.deny": "Gateway Tool Denylist", + "gateway.handshakeTimeoutMs": "Gateway Handshake Timeout", "gateway.channelHealthCheckMinutes": "Gateway Channel Health Check Interval (min)", "gateway.channelStaleEventThresholdMinutes": "Gateway Channel Stale Event Threshold (min)", "gateway.channelMaxRestartsPerHour": "Gateway Channel Max Restarts Per Hour", diff --git a/src/config/types.gateway.ts b/src/config/types.gateway.ts index 088d031cc6e..5535d0e2e6d 100644 --- a/src/config/types.gateway.ts +++ b/src/config/types.gateway.ts @@ -453,6 +453,11 @@ export type GatewayConfig = { tools?: GatewayToolsConfig; /** WebChat display/history settings. */ webchat?: GatewayWebchatConfig; + /** + * Pre-auth Gateway WebSocket handshake timeout in milliseconds. + * Env var OPENCLAW_HANDSHAKE_TIMEOUT_MS takes precedence. Default: 15000. + */ + handshakeTimeoutMs?: number; /** * Channel health monitor interval in minutes. * Periodically checks channel health and restarts unhealthy channels. diff --git a/src/config/zod-schema.ts b/src/config/zod-schema.ts index 5581c772831..1249e4872db 100644 --- a/src/config/zod-schema.ts +++ b/src/config/zod-schema.ts @@ -797,6 +797,7 @@ export const OpenClawSchema = z }) .strict() .optional(), + handshakeTimeoutMs: z.number().int().min(1).optional(), channelHealthCheckMinutes: z.number().int().min(0).optional(), channelStaleEventThresholdMinutes: z.number().int().min(1).optional(), channelMaxRestartsPerHour: z.number().int().min(1).optional(), diff --git a/src/gateway/handshake-timeouts.test.ts b/src/gateway/handshake-timeouts.test.ts index 01530bb22a0..c3e2f01b600 100644 --- a/src/gateway/handshake-timeouts.test.ts +++ b/src/gateway/handshake-timeouts.test.ts @@ -7,6 +7,7 @@ import { MAX_CONNECT_CHALLENGE_TIMEOUT_MS, MIN_CONNECT_CHALLENGE_TIMEOUT_MS, resolveConnectChallengeTimeoutMs, + resolvePreauthHandshakeTimeoutMs, } from "./handshake-timeouts.js"; describe("gateway handshake timeouts", () => { @@ -36,6 +37,39 @@ describe("gateway handshake timeouts", () => { ).toBe(20); }); + test("resolves preauth handshake timeout with env over config over default", () => { + expect( + resolvePreauthHandshakeTimeoutMs({ + env: { OPENCLAW_HANDSHAKE_TIMEOUT_MS: "75000" }, + configuredTimeoutMs: 30_000, + }), + ).toBe(75_000); + expect( + resolvePreauthHandshakeTimeoutMs({ + env: {}, + configuredTimeoutMs: 30_000, + }), + ).toBe(30_000); + expect( + resolvePreauthHandshakeTimeoutMs({ + env: { OPENCLAW_HANDSHAKE_TIMEOUT_MS: "garbage" }, + configuredTimeoutMs: 30_000, + }), + ).toBe(30_000); + expect(resolvePreauthHandshakeTimeoutMs({ env: {} })).toBe( + DEFAULT_PREAUTH_HANDSHAKE_TIMEOUT_MS, + ); + }); + + test("resolves preauth handshake timeout from the test-only env before config", () => { + expect( + resolvePreauthHandshakeTimeoutMs({ + env: { VITEST: "1", OPENCLAW_TEST_HANDSHAKE_TIMEOUT_MS: "50" }, + configuredTimeoutMs: 30_000, + }), + ).toBe(50); + }); + test("ignores invalid handshake timeout overrides and falls back safely", () => { expect( getPreauthHandshakeTimeoutMsFromEnv({ diff --git a/src/gateway/handshake-timeouts.ts b/src/gateway/handshake-timeouts.ts index 73e8ad5714a..f01c2514cfc 100644 --- a/src/gateway/handshake-timeouts.ts +++ b/src/gateway/handshake-timeouts.ts @@ -44,3 +44,23 @@ export function getPreauthHandshakeTimeoutMsFromEnv(env: NodeJS.ProcessEnv = pro } return DEFAULT_PREAUTH_HANDSHAKE_TIMEOUT_MS; } + +export function resolvePreauthHandshakeTimeoutMs(params?: { + env?: NodeJS.ProcessEnv; + configuredTimeoutMs?: number | null; +}): number { + const env = params?.env ?? process.env; + const configuredTimeout = + env.OPENCLAW_HANDSHAKE_TIMEOUT_MS || (env.VITEST && env.OPENCLAW_TEST_HANDSHAKE_TIMEOUT_MS); + if (configuredTimeout) { + const parsed = Number(configuredTimeout); + if (Number.isFinite(parsed) && parsed > 0) { + return parsed; + } + } + const configured = params?.configuredTimeoutMs; + if (typeof configured === "number" && Number.isFinite(configured) && configured > 0) { + return configured; + } + return DEFAULT_PREAUTH_HANDSHAKE_TIMEOUT_MS; +} diff --git a/src/gateway/server-ws-runtime.ts b/src/gateway/server-ws-runtime.ts index 92478e8469b..e46d84bb624 100644 --- a/src/gateway/server-ws-runtime.ts +++ b/src/gateway/server-ws-runtime.ts @@ -35,6 +35,7 @@ export function attachGatewayWsHandlers(params: GatewayWsRuntimeParams) { getRequiredSharedGatewaySessionGeneration: params.getRequiredSharedGatewaySessionGeneration, rateLimiter: params.rateLimiter, browserRateLimiter: params.browserRateLimiter, + preauthHandshakeTimeoutMs: params.preauthHandshakeTimeoutMs, gatewayMethods: params.gatewayMethods, events: params.events, refreshHealthSnapshot: params.context.refreshHealthSnapshot, diff --git a/src/gateway/server.impl.ts b/src/gateway/server.impl.ts index 218ef48991c..f4961fd9520 100644 --- a/src/gateway/server.impl.ts +++ b/src/gateway/server.impl.ts @@ -535,6 +535,8 @@ export async function startGatewayServer( current: resolveCurrentSharedGatewaySessionGeneration(), required: null, }; + const preauthHandshakeTimeoutMs = + cfgAtStart.gateway?.handshakeTimeoutMs ?? getRuntimeConfig().gateway?.handshakeTimeoutMs; const initialHooksConfig = runtimeConfig.hooksConfig; const initialHookClientIpConfig = resolveHookClientIpConfig(cfgAtStart); const canvasHostEnabled = runtimeConfig.canvasHostEnabled; @@ -939,6 +941,7 @@ export async function startGatewayServer( getRequiredSharedGatewaySessionGeneration(sharedGatewaySessionGenerationState), rateLimiter: authRateLimiter, browserRateLimiter: browserAuthRateLimiter, + preauthHandshakeTimeoutMs, gatewayMethods: runtimeState.gatewayMethods, events: GATEWAY_EVENTS, logGateway: log, diff --git a/src/gateway/server.preauth-hardening.test.ts b/src/gateway/server.preauth-hardening.test.ts index 6ad2dbaab98..cd743b13523 100644 --- a/src/gateway/server.preauth-hardening.test.ts +++ b/src/gateway/server.preauth-hardening.test.ts @@ -1,3 +1,4 @@ +import { writeFile } from "node:fs/promises"; import http from "node:http"; import { afterEach, describe, expect, it } from "vitest"; import { WebSocketServer } from "ws"; @@ -151,6 +152,48 @@ describe("gateway pre-auth hardening", () => { } }); + it("uses gateway.handshakeTimeoutMs for idle unauthenticated sockets", async () => { + const configPath = process.env.OPENCLAW_CONFIG_PATH; + if (!configPath) { + throw new Error("OPENCLAW_CONFIG_PATH missing in gateway preauth test"); + } + await writeFile( + configPath, + JSON.stringify( + { + gateway: { + handshakeTimeoutMs: 250, + }, + }, + null, + 2, + ), + "utf-8", + ); + try { + const harness = await createGatewaySuiteHarness({ + serverOptions: { auth: { mode: "none" } }, + }); + try { + const ws = await harness.openWs(); + await readConnectChallengeNonce(ws); + const close = await new Promise<{ code: number; elapsedMs: number }>((resolve) => { + const startedAt = Date.now(); + ws.once("close", (code) => { + resolve({ code, elapsedMs: Date.now() - startedAt }); + }); + }); + expect(close.code).toBe(1000); + expect(close.elapsedMs).toBeGreaterThan(0); + expect(close.elapsedMs).toBeLessThan(PREAUTH_HANDSHAKE_TEST_CLOSE_LIMIT_MS); + } finally { + await harness.close(); + } + } finally { + await writeFile(configPath, "{}\n", "utf-8"); + } + }); + it("rejects oversized pre-auth connect frames before application-level auth responses", async () => { resetDiagnosticEventsForTest(); const events: DiagnosticEventPayload[] = []; @@ -227,7 +270,9 @@ describe("gateway pre-auth hardening", () => { }); req.once("response", (res) => { res.resume(); - resolve(res.statusCode ?? 0); + res.once("end", () => { + resolve(res.statusCode ?? 0); + }); }); req.once("error", reject); req.end(); diff --git a/src/gateway/server/ws-connection.ts b/src/gateway/server/ws-connection.ts index a1b8b8fafc6..9b04999c132 100644 --- a/src/gateway/server/ws-connection.ts +++ b/src/gateway/server/ws-connection.ts @@ -11,7 +11,7 @@ import { truncateUtf16Safe } from "../../utils.js"; import { isWebchatClient } from "../../utils/message-channel.js"; import type { AuthRateLimiter } from "../auth-rate-limit.js"; import type { ResolvedGatewayAuth } from "../auth.js"; -import { getPreauthHandshakeTimeoutMsFromEnv } from "../handshake-timeouts.js"; +import { resolvePreauthHandshakeTimeoutMs } from "../handshake-timeouts.js"; import { isLoopbackAddress } from "../net.js"; import { MAX_PAYLOAD_BYTES, MAX_PREAUTH_PAYLOAD_BYTES } from "../server-constants.js"; import { clearNodeWakeState } from "../server-methods/nodes-wake-state.js"; @@ -131,6 +131,7 @@ export type GatewayWsSharedHandlerParams = { rateLimiter?: AuthRateLimiter; /** Browser-origin fallback limiter (loopback is never exempt). */ browserRateLimiter?: AuthRateLimiter; + preauthHandshakeTimeoutMs?: number; gatewayMethods: string[]; events: string[]; refreshHealthSnapshot: GatewayRequestContext["refreshHealthSnapshot"]; @@ -365,7 +366,9 @@ export function attachGatewayWsConnectionHandler(params: AttachGatewayWsConnecti close(); }); - const handshakeTimeoutMs = getPreauthHandshakeTimeoutMsFromEnv(); + const handshakeTimeoutMs = resolvePreauthHandshakeTimeoutMs({ + configuredTimeoutMs: params.preauthHandshakeTimeoutMs, + }); const handshakeTimer = setTimeout(() => { if (!client) { handshakeState = "failed"; diff --git a/src/gateway/voiceclaw-realtime/session.ts b/src/gateway/voiceclaw-realtime/session.ts index 3c8d769182b..3823bc34270 100644 --- a/src/gateway/voiceclaw-realtime/session.ts +++ b/src/gateway/voiceclaw-realtime/session.ts @@ -10,7 +10,7 @@ import { type GatewayAuthResult, type ResolvedGatewayAuth, } from "../auth.js"; -import { getPreauthHandshakeTimeoutMsFromEnv } from "../handshake-timeouts.js"; +import { resolvePreauthHandshakeTimeoutMs } from "../handshake-timeouts.js"; import { VoiceClawGeminiLiveAdapter } from "./gemini-live.js"; import { createVoiceClawRealtimeToolRuntime, @@ -70,12 +70,17 @@ export class VoiceClawRealtimeSession { } attach(): void { - this.handshakeTimer = setTimeout(() => { - if (!this.config && !this.closed) { - log.warn(`session ${this.id} handshake timed out`); - this.ws.close(1000, "handshake timeout"); - } - }, getPreauthHandshakeTimeoutMsFromEnv()); + this.handshakeTimer = setTimeout( + () => { + if (!this.config && !this.closed) { + log.warn(`session ${this.id} handshake timed out`); + this.ws.close(1000, "handshake timeout"); + } + }, + resolvePreauthHandshakeTimeoutMs({ + configuredTimeoutMs: this.gatewayConfig.gateway?.handshakeTimeoutMs, + }), + ); this.ws.on("message", (raw) => { void this.handleRawMessage(raw).catch((err) => { diff --git a/src/gateway/voiceclaw-realtime/upgrade.test.ts b/src/gateway/voiceclaw-realtime/upgrade.test.ts index a63cded99cc..b32bec7b469 100644 --- a/src/gateway/voiceclaw-realtime/upgrade.test.ts +++ b/src/gateway/voiceclaw-realtime/upgrade.test.ts @@ -69,12 +69,34 @@ describe("VoiceClaw realtime gateway upgrade", () => { } }); }); + + it("uses gateway.handshakeTimeoutMs for idle realtime sockets", async () => { + await withRealtimeGateway( + async ({ port }) => { + const ws = new WebSocket(`ws://127.0.0.1:${port}${VOICECLAW_REALTIME_PATH}`); + + try { + await waitForOpen(ws); + await expect(waitForClose(ws)).resolves.toMatchObject({ + code: 1000, + reason: "handshake timeout", + }); + } finally { + await closeWebSocket(ws); + } + }, + { gateway: { auth: { mode: "none" }, handshakeTimeoutMs: 60 } }, + ); + }); }); -async function withRealtimeGateway(run: (params: { port: number }) => Promise) { +async function withRealtimeGateway( + run: (params: { port: number }) => Promise, + cfg: Record = { gateway: { auth: { mode: "none" } } }, +) { const resolvedAuth: ResolvedGatewayAuth = { mode: "none", allowTailscale: false }; await withTempConfig({ - cfg: { gateway: { auth: { mode: "none" } } }, + cfg, run: async () => { const clients = new Set(); const httpServer = createGatewayHttpServer({