diff --git a/CHANGELOG.md b/CHANGELOG.md index ab14e0fefef..855dfcf4aaa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -78,6 +78,7 @@ Docs: https://docs.openclaw.ai - ACP/sessions_spawn: implicitly stream `mode="run"` ACP spawns to parent only for eligible subagent orchestrator sessions (heartbeat `target: "last"` with a usable session-local route), restoring parent progress relays without thread binding. (#42404) Thanks @davidguttman. - Sessions/reset model recompute: clear stale runtime model, context-token, and system-prompt metadata before session resets recompute the replacement session, so resets pick up current defaults and explicit overrides instead of reusing old runtime model state. (#41173) thanks @PonyX-lab. - Browser/Browserbase 429 handling: surface stable no-retry rate-limit guidance without buffering discarded HTTP 429 response bodies from remote browser services. (#40491) thanks @mvanhorn. +- Gateway/auth: allow one trusted device-token retry on shared-token mismatch with recovery hints to prevent reconnect churn during token drift. (#42507) Thanks @joshavant. ## 2026.3.8 diff --git a/apps/shared/OpenClawKit/Sources/OpenClawKit/GatewayChannel.swift b/apps/shared/OpenClawKit/Sources/OpenClawKit/GatewayChannel.swift index 3dc5eacee6e..f822e32044e 100644 --- a/apps/shared/OpenClawKit/Sources/OpenClawKit/GatewayChannel.swift +++ b/apps/shared/OpenClawKit/Sources/OpenClawKit/GatewayChannel.swift @@ -131,6 +131,41 @@ private let defaultOperatorConnectScopes: [String] = [ "operator.pairing", ] +private enum GatewayConnectErrorCodes { + static let authTokenMismatch = "AUTH_TOKEN_MISMATCH" + static let authDeviceTokenMismatch = "AUTH_DEVICE_TOKEN_MISMATCH" + static let authTokenMissing = "AUTH_TOKEN_MISSING" + static let authPasswordMissing = "AUTH_PASSWORD_MISSING" + static let authPasswordMismatch = "AUTH_PASSWORD_MISMATCH" + static let authRateLimited = "AUTH_RATE_LIMITED" + static let pairingRequired = "PAIRING_REQUIRED" + static let controlUiDeviceIdentityRequired = "CONTROL_UI_DEVICE_IDENTITY_REQUIRED" + static let deviceIdentityRequired = "DEVICE_IDENTITY_REQUIRED" +} + +private struct GatewayConnectAuthError: LocalizedError { + let message: String + let detailCode: String? + let canRetryWithDeviceToken: Bool + + var errorDescription: String? { self.message } + + var isNonRecoverable: Bool { + switch self.detailCode { + case GatewayConnectErrorCodes.authTokenMissing, + GatewayConnectErrorCodes.authPasswordMissing, + GatewayConnectErrorCodes.authPasswordMismatch, + GatewayConnectErrorCodes.authRateLimited, + GatewayConnectErrorCodes.pairingRequired, + GatewayConnectErrorCodes.controlUiDeviceIdentityRequired, + GatewayConnectErrorCodes.deviceIdentityRequired: + return true + default: + return false + } + } +} + public actor GatewayChannelActor { private let logger = Logger(subsystem: "ai.openclaw", category: "gateway") private var task: WebSocketTaskBox? @@ -160,6 +195,9 @@ public actor GatewayChannelActor { private var watchdogTask: Task? private var tickTask: Task? private var keepaliveTask: Task? + private var pendingDeviceTokenRetry = false + private var deviceTokenRetryBudgetUsed = false + private var reconnectPausedForAuthFailure = false private let defaultRequestTimeoutMs: Double = 15000 private let pushHandler: (@Sendable (GatewayPush) async -> Void)? private let connectOptions: GatewayConnectOptions? @@ -232,10 +270,19 @@ public actor GatewayChannelActor { while self.shouldReconnect { guard await self.sleepUnlessCancelled(nanoseconds: 30 * 1_000_000_000) else { return } // 30s cadence guard self.shouldReconnect else { return } + if self.reconnectPausedForAuthFailure { continue } if self.connected { continue } do { try await self.connect() } catch { + if self.shouldPauseReconnectAfterAuthFailure(error) { + self.reconnectPausedForAuthFailure = true + self.logger.error( + "gateway watchdog reconnect paused for non-recoverable auth failure " + + "\(error.localizedDescription, privacy: .public)" + ) + continue + } let wrapped = self.wrap(error, context: "gateway watchdog reconnect") self.logger.error("gateway watchdog reconnect failed \(wrapped.localizedDescription, privacy: .public)") } @@ -267,7 +314,12 @@ public actor GatewayChannelActor { }, operation: { try await self.sendConnect() }) } catch { - let wrapped = self.wrap(error, context: "connect to gateway @ \(self.url.absoluteString)") + let wrapped: Error + if let authError = error as? GatewayConnectAuthError { + wrapped = authError + } else { + wrapped = self.wrap(error, context: "connect to gateway @ \(self.url.absoluteString)") + } self.connected = false self.task?.cancel(with: .goingAway, reason: nil) await self.disconnectHandler?("connect failed: \(wrapped.localizedDescription)") @@ -281,6 +333,7 @@ public actor GatewayChannelActor { } self.listen() self.connected = true + self.reconnectPausedForAuthFailure = false self.backoffMs = 500 self.lastSeq = nil self.startKeepalive() @@ -371,11 +424,18 @@ public actor GatewayChannelActor { (includeDeviceIdentity && identity != nil) ? DeviceAuthStore.loadToken(deviceId: identity!.deviceId, role: role)?.token : nil - // If we're not sending a device identity, a device token can't be validated server-side. - // In that mode we always use the shared gateway token/password. - let authToken = includeDeviceIdentity ? (storedToken ?? self.token) : self.token + let shouldUseDeviceRetryToken = + includeDeviceIdentity && self.pendingDeviceTokenRetry && + storedToken != nil && self.token != nil && self.isTrustedDeviceRetryEndpoint() + if shouldUseDeviceRetryToken { + self.pendingDeviceTokenRetry = false + } + // Keep shared credentials explicit when provided. Device token retry is attached + // only on a bounded second attempt after token mismatch. + let authToken = self.token ?? (includeDeviceIdentity ? storedToken : nil) + let authDeviceToken = shouldUseDeviceRetryToken ? storedToken : nil let authSource: GatewayAuthSource - if storedToken != nil { + if authDeviceToken != nil || (self.token == nil && storedToken != nil) { authSource = .deviceToken } else if authToken != nil { authSource = .sharedToken @@ -386,9 +446,12 @@ public actor GatewayChannelActor { } self.lastAuthSource = authSource self.logger.info("gateway connect auth=\(authSource.rawValue, privacy: .public)") - let canFallbackToShared = includeDeviceIdentity && storedToken != nil && self.token != nil if let authToken { - params["auth"] = ProtoAnyCodable(["token": ProtoAnyCodable(authToken)]) + var auth: [String: ProtoAnyCodable] = ["token": ProtoAnyCodable(authToken)] + if let authDeviceToken { + auth["deviceToken"] = ProtoAnyCodable(authDeviceToken) + } + params["auth"] = ProtoAnyCodable(auth) } else if let password = self.password { params["auth"] = ProtoAnyCodable(["password": ProtoAnyCodable(password)]) } @@ -426,11 +489,24 @@ public actor GatewayChannelActor { do { let response = try await self.waitForConnectResponse(reqId: reqId) try await self.handleConnectResponse(response, identity: identity, role: role) + self.pendingDeviceTokenRetry = false + self.deviceTokenRetryBudgetUsed = false } catch { - if canFallbackToShared { - if let identity { - DeviceAuthStore.clearToken(deviceId: identity.deviceId, role: role) - } + let shouldRetryWithDeviceToken = self.shouldRetryWithStoredDeviceToken( + error: error, + explicitGatewayToken: self.token, + storedToken: storedToken, + attemptedDeviceTokenRetry: authDeviceToken != nil) + if shouldRetryWithDeviceToken { + self.pendingDeviceTokenRetry = true + self.deviceTokenRetryBudgetUsed = true + self.backoffMs = min(self.backoffMs, 250) + } else if authDeviceToken != nil, + let identity, + self.shouldClearStoredDeviceTokenAfterRetry(error) + { + // Retry failed with an explicit device-token mismatch; clear stale local token. + DeviceAuthStore.clearToken(deviceId: identity.deviceId, role: role) } throw error } @@ -443,7 +519,13 @@ public actor GatewayChannelActor { ) async throws { if res.ok == false { let msg = (res.error?["message"]?.value as? String) ?? "gateway connect failed" - throw NSError(domain: "Gateway", code: 1008, userInfo: [NSLocalizedDescriptionKey: msg]) + let details = res.error?["details"]?.value as? [String: ProtoAnyCodable] + let detailCode = details?["code"]?.value as? String + let canRetryWithDeviceToken = details?["canRetryWithDeviceToken"]?.value as? Bool ?? false + throw GatewayConnectAuthError( + message: msg, + detailCode: detailCode, + canRetryWithDeviceToken: canRetryWithDeviceToken) } guard let payload = res.payload else { throw NSError( @@ -616,19 +698,91 @@ public actor GatewayChannelActor { private func scheduleReconnect() async { guard self.shouldReconnect else { return } + guard !self.reconnectPausedForAuthFailure else { return } let delay = self.backoffMs / 1000 self.backoffMs = min(self.backoffMs * 2, 30000) guard await self.sleepUnlessCancelled(nanoseconds: UInt64(delay * 1_000_000_000)) else { return } guard self.shouldReconnect else { return } + guard !self.reconnectPausedForAuthFailure else { return } do { try await self.connect() } catch { + if self.shouldPauseReconnectAfterAuthFailure(error) { + self.reconnectPausedForAuthFailure = true + self.logger.error( + "gateway reconnect paused for non-recoverable auth failure " + + "\(error.localizedDescription, privacy: .public)" + ) + return + } let wrapped = self.wrap(error, context: "gateway reconnect") self.logger.error("gateway reconnect failed \(wrapped.localizedDescription, privacy: .public)") await self.scheduleReconnect() } } + private func shouldRetryWithStoredDeviceToken( + error: Error, + explicitGatewayToken: String?, + storedToken: String?, + attemptedDeviceTokenRetry: Bool + ) -> Bool { + if self.deviceTokenRetryBudgetUsed { + return false + } + if attemptedDeviceTokenRetry { + return false + } + guard explicitGatewayToken != nil, storedToken != nil else { + return false + } + guard self.isTrustedDeviceRetryEndpoint() else { + return false + } + guard let authError = error as? GatewayConnectAuthError else { + return false + } + return authError.canRetryWithDeviceToken || + authError.detailCode == GatewayConnectErrorCodes.authTokenMismatch + } + + private func shouldPauseReconnectAfterAuthFailure(_ error: Error) -> Bool { + guard let authError = error as? GatewayConnectAuthError else { + return false + } + if authError.isNonRecoverable { + return true + } + if authError.detailCode == GatewayConnectErrorCodes.authTokenMismatch && + self.deviceTokenRetryBudgetUsed && !self.pendingDeviceTokenRetry + { + return true + } + return false + } + + private func shouldClearStoredDeviceTokenAfterRetry(_ error: Error) -> Bool { + guard let authError = error as? GatewayConnectAuthError else { + return false + } + return authError.detailCode == GatewayConnectErrorCodes.authDeviceTokenMismatch + } + + private func isTrustedDeviceRetryEndpoint() -> Bool { + // This client currently treats loopback as the only trusted retry target. + // Unlike the Node gateway client, it does not yet expose a pinned TLS-fingerprint + // trust path for remote retry, so remote fallback remains disabled by default. + guard let host = self.url.host?.trimmingCharacters(in: .whitespacesAndNewlines).lowercased(), + !host.isEmpty + else { + return false + } + if host == "localhost" || host == "::1" || host == "127.0.0.1" || host.hasPrefix("127.") { + return true + } + return false + } + private nonisolated func sleepUnlessCancelled(nanoseconds: UInt64) async -> Bool { do { try await Task.sleep(nanoseconds: nanoseconds) @@ -756,7 +910,8 @@ public actor GatewayChannelActor { return (id: id, data: data) } catch { self.logger.error( - "gateway \(kind) encode failed \(method, privacy: .public) error=\(error.localizedDescription, privacy: .public)") + "gateway \(kind) encode failed \(method, privacy: .public) " + + "error=\(error.localizedDescription, privacy: .public)") throw error } } diff --git a/docs/cli/devices.md b/docs/cli/devices.md index be01e3cc0d5..f73f30dfa1d 100644 --- a/docs/cli/devices.md +++ b/docs/cli/devices.md @@ -92,3 +92,40 @@ Pass `--token` or `--password` explicitly. Missing explicit credentials is an er - These commands require `operator.pairing` (or `operator.admin`) scope. - `devices clear` is intentionally gated by `--yes`. - If pairing scope is unavailable on local loopback (and no explicit `--url` is passed), list/approve can use a local pairing fallback. + +## Token drift recovery checklist + +Use this when Control UI or other clients keep failing with `AUTH_TOKEN_MISMATCH` or `AUTH_DEVICE_TOKEN_MISMATCH`. + +1. Confirm current gateway token source: + +```bash +openclaw config get gateway.auth.token +``` + +2. List paired devices and identify the affected device id: + +```bash +openclaw devices list +``` + +3. Rotate operator token for the affected device: + +```bash +openclaw devices rotate --device --role operator +``` + +4. If rotation is not enough, remove stale pairing and approve again: + +```bash +openclaw devices remove +openclaw devices list +openclaw devices approve +``` + +5. Retry client connection with the current shared token/password. + +Related: + +- [Dashboard auth troubleshooting](/web/dashboard#if-you-see-unauthorized-1008) +- [Gateway troubleshooting](/gateway/troubleshooting#dashboard-control-ui-connectivity) diff --git a/docs/gateway/protocol.md b/docs/gateway/protocol.md index 62a5adb1fef..9c886a31716 100644 --- a/docs/gateway/protocol.md +++ b/docs/gateway/protocol.md @@ -206,6 +206,12 @@ The Gateway treats these as **claims** and enforces server-side allowlists. persisted by the client for future connects. - Device tokens can be rotated/revoked via `device.token.rotate` and `device.token.revoke` (requires `operator.pairing` scope). +- Auth failures include `error.details.code` plus recovery hints: + - `error.details.canRetryWithDeviceToken` (boolean) + - `error.details.recommendedNextStep` (`retry_with_device_token`, `update_auth_configuration`, `update_auth_credentials`, `wait_then_retry`, `review_auth_configuration`) +- Client behavior for `AUTH_TOKEN_MISMATCH`: + - Trusted clients may attempt one bounded retry with a cached per-device token. + - If that retry fails, clients should stop automatic reconnect loops and surface operator action guidance. ## Device identity + pairing @@ -217,8 +223,9 @@ The Gateway treats these as **claims** and enforces server-side allowlists. - **Local** connects include loopback and the gateway host’s own tailnet address (so same‑host tailnet binds can still auto‑approve). - All WS clients must include `device` identity during `connect` (operator + node). - Control UI can omit it **only** when `gateway.controlUi.dangerouslyDisableDeviceAuth` - is enabled for break-glass use. + Control UI can omit it only in these modes: + - `gateway.controlUi.allowInsecureAuth=true` for localhost-only insecure HTTP compatibility. + - `gateway.controlUi.dangerouslyDisableDeviceAuth=true` (break-glass, severe security downgrade). - All connections must sign the server-provided `connect.challenge` nonce. ### Device auth migration diagnostics diff --git a/docs/gateway/security/index.md b/docs/gateway/security/index.md index c62b77352e8..571f91cf405 100644 --- a/docs/gateway/security/index.md +++ b/docs/gateway/security/index.md @@ -262,9 +262,14 @@ High-signal `checkId` values you will most likely see in real deployments (not e ## Control UI over HTTP The Control UI needs a **secure context** (HTTPS or localhost) to generate device -identity. `gateway.controlUi.allowInsecureAuth` does **not** bypass secure-context, -device-identity, or device-pairing checks. Prefer HTTPS (Tailscale Serve) or open -the UI on `127.0.0.1`. +identity. `gateway.controlUi.allowInsecureAuth` is a local compatibility toggle: + +- On localhost, it allows Control UI auth without device identity when the page + is loaded over non-secure HTTP. +- It does not bypass pairing checks. +- It does not relax remote (non-localhost) device identity requirements. + +Prefer HTTPS (Tailscale Serve) or open the UI on `127.0.0.1`. For break-glass scenarios only, `gateway.controlUi.dangerouslyDisableDeviceAuth` disables device identity checks entirely. This is a severe security downgrade; diff --git a/docs/gateway/troubleshooting.md b/docs/gateway/troubleshooting.md index 46d2c58b966..ebea28a6541 100644 --- a/docs/gateway/troubleshooting.md +++ b/docs/gateway/troubleshooting.md @@ -113,9 +113,21 @@ Common signatures: challenge-based device auth flow (`connect.challenge` + `device.nonce`). - `device signature invalid` / `device signature expired` → client signed the wrong payload (or stale timestamp) for the current handshake. -- `unauthorized` / reconnect loop → token/password mismatch. +- `AUTH_TOKEN_MISMATCH` with `canRetryWithDeviceToken=true` → client can do one trusted retry with cached device token. +- repeated `unauthorized` after that retry → shared token/device token drift; refresh token config and re-approve/rotate device token if needed. - `gateway connect failed:` → wrong host/port/url target. +### Auth detail codes quick map + +Use `error.details.code` from the failed `connect` response to pick the next action: + +| Detail code | Meaning | Recommended action | +| ---------------------------- | -------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `AUTH_TOKEN_MISSING` | Client did not send a required shared token. | Paste/set token in the client and retry. For dashboard paths: `openclaw config get gateway.auth.token` then paste into Control UI settings. | +| `AUTH_TOKEN_MISMATCH` | Shared token did not match gateway auth token. | If `canRetryWithDeviceToken=true`, allow one trusted retry. If still failing, run the [token drift recovery checklist](/cli/devices#token-drift-recovery-checklist). | +| `AUTH_DEVICE_TOKEN_MISMATCH` | Cached per-device token is stale or revoked. | Rotate/re-approve device token using [devices CLI](/cli/devices), then reconnect. | +| `PAIRING_REQUIRED` | Device identity is known but not approved for this role. | Approve pending request: `openclaw devices list` then `openclaw devices approve `. | + Device auth v2 migration check: ```bash @@ -135,6 +147,7 @@ Related: - [/web/control-ui](/web/control-ui) - [/gateway/authentication](/gateway/authentication) - [/gateway/remote](/gateway/remote) +- [/cli/devices](/cli/devices) ## Gateway service not running diff --git a/docs/help/faq.md b/docs/help/faq.md index a43e91f4396..a1d8724e125 100644 --- a/docs/help/faq.md +++ b/docs/help/faq.md @@ -2512,6 +2512,7 @@ Your gateway is running with auth enabled (`gateway.auth.*`), but the UI is not Facts (from code): - The Control UI keeps the token in `sessionStorage` for the current browser tab session and selected gateway URL, so same-tab refreshes keep working without restoring long-lived localStorage token persistence. +- On `AUTH_TOKEN_MISMATCH`, trusted clients can attempt one bounded retry with a cached device token when the gateway returns retry hints (`canRetryWithDeviceToken=true`, `recommendedNextStep=retry_with_device_token`). Fix: @@ -2520,6 +2521,9 @@ Fix: - If remote, tunnel first: `ssh -N -L 18789:127.0.0.1:18789 user@host` then open `http://127.0.0.1:18789/`. - Set `gateway.auth.token` (or `OPENCLAW_GATEWAY_TOKEN`) on the gateway host. - In the Control UI settings, paste the same token. +- If mismatch persists after the one retry, rotate/re-approve the paired device token: + - `openclaw devices list` + - `openclaw devices rotate --device --role operator` - Still stuck? Run `openclaw status --all` and follow [Troubleshooting](/gateway/troubleshooting). See [Dashboard](/web/dashboard) for auth details. ### I set gatewaybind tailnet but it can't bind nothing listens diff --git a/docs/help/troubleshooting.md b/docs/help/troubleshooting.md index e051f77f589..951e1a480d7 100644 --- a/docs/help/troubleshooting.md +++ b/docs/help/troubleshooting.md @@ -136,7 +136,8 @@ flowchart TD Common log signatures: - `device identity required` → HTTP/non-secure context cannot complete device auth. - - `unauthorized` / reconnect loop → wrong token/password or auth mode mismatch. + - `AUTH_TOKEN_MISMATCH` with retry hints (`canRetryWithDeviceToken=true`) → one trusted device-token retry may occur automatically. + - repeated `unauthorized` after that retry → wrong token/password, auth mode mismatch, or stale paired device token. - `gateway connect failed:` → UI is targeting the wrong URL/port or unreachable gateway. Deep pages: diff --git a/docs/web/control-ui.md b/docs/web/control-ui.md index c96a91de0ba..59e9c0c226b 100644 --- a/docs/web/control-ui.md +++ b/docs/web/control-ui.md @@ -174,7 +174,12 @@ OpenClaw **blocks** Control UI connections without device identity. } ``` -`allowInsecureAuth` does not bypass Control UI device identity or pairing checks. +`allowInsecureAuth` is a local compatibility toggle only: + +- It allows localhost Control UI sessions to proceed without device identity in + non-secure HTTP contexts. +- It does not bypass pairing checks. +- It does not relax remote (non-localhost) device identity requirements. **Break-glass only:** diff --git a/docs/web/dashboard.md b/docs/web/dashboard.md index ab5872a6754..86cd6fffd4e 100644 --- a/docs/web/dashboard.md +++ b/docs/web/dashboard.md @@ -45,6 +45,8 @@ Prefer localhost, Tailscale Serve, or an SSH tunnel. ## If you see “unauthorized” / 1008 - Ensure the gateway is reachable (local: `openclaw status`; remote: SSH tunnel `ssh -N -L 18789:127.0.0.1:18789 user@host` then open `http://127.0.0.1:18789/`). +- For `AUTH_TOKEN_MISMATCH`, clients may do one trusted retry with a cached device token when the gateway returns retry hints. If auth still fails after that retry, resolve token drift manually. +- For token drift repair steps, follow [Token drift recovery checklist](/cli/devices#token-drift-recovery-checklist). - Retrieve or supply the token from the gateway host: - Plaintext config: `openclaw config get gateway.auth.token` - SecretRef-managed config: resolve the external secret provider or export `OPENCLAW_GATEWAY_TOKEN` in this shell, then rerun `openclaw dashboard` diff --git a/package.json b/package.json index 43fd734092a..695bad9d076 100644 --- a/package.json +++ b/package.json @@ -299,6 +299,7 @@ "start": "node scripts/run-node.mjs", "test": "node scripts/test-parallel.mjs", "test:all": "pnpm lint && pnpm build && pnpm test && pnpm test:e2e && pnpm test:live && pnpm test:docker:all", + "test:auth:compat": "vitest run --config vitest.gateway.config.ts src/gateway/server.auth.compat-baseline.test.ts src/gateway/client.test.ts src/gateway/reconnect-gating.test.ts src/gateway/protocol/connect-error-details.test.ts", "test:channels": "vitest run --config vitest.channels.config.ts", "test:coverage": "vitest run --config vitest.unit.config.ts --coverage", "test:docker:all": "pnpm test:docker:live-models && pnpm test:docker:live-gateway && pnpm test:docker:onboard && pnpm test:docker:gateway-network && pnpm test:docker:qr && pnpm test:docker:doctor-switch && pnpm test:docker:plugins && pnpm test:docker:cleanup", diff --git a/src/gateway/client.test.ts b/src/gateway/client.test.ts index 04ddc5027d4..eb081520a0f 100644 --- a/src/gateway/client.test.ts +++ b/src/gateway/client.test.ts @@ -7,7 +7,6 @@ const wsInstances = vi.hoisted((): MockWebSocket[] => []); const clearDeviceAuthTokenMock = vi.hoisted(() => vi.fn()); const loadDeviceAuthTokenMock = vi.hoisted(() => vi.fn()); const storeDeviceAuthTokenMock = vi.hoisted(() => vi.fn()); -const clearDevicePairingMock = vi.hoisted(() => vi.fn()); const logDebugMock = vi.hoisted(() => vi.fn()); type WsEvent = "open" | "message" | "close" | "error"; @@ -52,7 +51,9 @@ class MockWebSocket { } } - close(_code?: number, _reason?: string): void {} + close(code?: number, reason?: string): void { + this.emitClose(code ?? 1000, reason ?? ""); + } send(data: string): void { this.sent.push(data); @@ -91,14 +92,6 @@ vi.mock("../infra/device-auth-store.js", async (importOriginal) => { }; }); -vi.mock("../infra/device-pairing.js", async (importOriginal) => { - const actual = await importOriginal(); - return { - ...actual, - clearDevicePairing: (...args: unknown[]) => clearDevicePairingMock(...args), - }; -}); - vi.mock("../logger.js", async (importOriginal) => { const actual = await importOriginal(); return { @@ -250,8 +243,6 @@ describe("GatewayClient close handling", () => { wsInstances.length = 0; clearDeviceAuthTokenMock.mockClear(); clearDeviceAuthTokenMock.mockImplementation(() => undefined); - clearDevicePairingMock.mockClear(); - clearDevicePairingMock.mockResolvedValue(true); logDebugMock.mockClear(); }); @@ -266,7 +257,7 @@ describe("GatewayClient close handling", () => { ); expect(clearDeviceAuthTokenMock).toHaveBeenCalledWith({ deviceId: "dev-1", role: "operator" }); - expect(clearDevicePairingMock).toHaveBeenCalledWith("dev-1"); + expect(logDebugMock).toHaveBeenCalledWith("cleared stale device-auth token for device dev-1"); expect(onClose).toHaveBeenCalledWith( 1008, "unauthorized: DEVICE token mismatch (rotate/reissue device token)", @@ -289,38 +280,18 @@ describe("GatewayClient close handling", () => { expect(logDebugMock).toHaveBeenCalledWith( expect.stringContaining("failed clearing stale device-auth token"), ); - expect(clearDevicePairingMock).not.toHaveBeenCalled(); - expect(onClose).toHaveBeenCalledWith(1008, "unauthorized: device token mismatch"); - client.stop(); - }); - - it("does not break close flow when pairing clear rejects", async () => { - clearDevicePairingMock.mockRejectedValue(new Error("pairing store unavailable")); - const onClose = vi.fn(); - const client = createClientWithIdentity("dev-3", onClose); - - client.start(); - expect(() => { - getLatestWs().emitClose(1008, "unauthorized: device token mismatch"); - }).not.toThrow(); - - await Promise.resolve(); - expect(logDebugMock).toHaveBeenCalledWith( - expect.stringContaining("failed clearing stale device pairing"), - ); expect(onClose).toHaveBeenCalledWith(1008, "unauthorized: device token mismatch"); client.stop(); }); it("does not clear auth state for non-mismatch close reasons", () => { const onClose = vi.fn(); - const client = createClientWithIdentity("dev-4", onClose); + const client = createClientWithIdentity("dev-3", onClose); client.start(); getLatestWs().emitClose(1008, "unauthorized: signature invalid"); expect(clearDeviceAuthTokenMock).not.toHaveBeenCalled(); - expect(clearDevicePairingMock).not.toHaveBeenCalled(); expect(onClose).toHaveBeenCalledWith(1008, "unauthorized: signature invalid"); client.stop(); }); @@ -328,7 +299,7 @@ describe("GatewayClient close handling", () => { it("does not clear persisted device auth when explicit shared token is provided", () => { const onClose = vi.fn(); const identity: DeviceIdentity = { - deviceId: "dev-5", + deviceId: "dev-4", privateKeyPem: "private-key", // pragma: allowlist secret publicKeyPem: "public-key", }; @@ -343,7 +314,6 @@ describe("GatewayClient close handling", () => { getLatestWs().emitClose(1008, "unauthorized: device token mismatch"); expect(clearDeviceAuthTokenMock).not.toHaveBeenCalled(); - expect(clearDevicePairingMock).not.toHaveBeenCalled(); expect(onClose).toHaveBeenCalledWith(1008, "unauthorized: device token mismatch"); client.stop(); }); @@ -458,4 +428,156 @@ describe("GatewayClient connect auth payload", () => { }); client.stop(); }); + + it("retries with stored device token after shared-token mismatch on trusted endpoints", async () => { + loadDeviceAuthTokenMock.mockReturnValue({ token: "stored-device-token" }); + const client = new GatewayClient({ + url: "ws://127.0.0.1:18789", + token: "shared-token", + }); + + client.start(); + const ws1 = getLatestWs(); + ws1.emitOpen(); + emitConnectChallenge(ws1); + const firstConnectRaw = ws1.sent.find((frame) => frame.includes('"method":"connect"')); + expect(firstConnectRaw).toBeTruthy(); + const firstConnect = JSON.parse(firstConnectRaw ?? "{}") as { + id?: string; + params?: { auth?: { token?: string; deviceToken?: string } }; + }; + expect(firstConnect.params?.auth?.token).toBe("shared-token"); + expect(firstConnect.params?.auth?.deviceToken).toBeUndefined(); + + ws1.emitMessage( + JSON.stringify({ + type: "res", + id: firstConnect.id, + ok: false, + error: { + code: "INVALID_REQUEST", + message: "unauthorized", + details: { code: "AUTH_TOKEN_MISMATCH", canRetryWithDeviceToken: true }, + }, + }), + ); + + await vi.waitFor(() => expect(wsInstances.length).toBeGreaterThan(1), { timeout: 3_000 }); + const ws2 = getLatestWs(); + ws2.emitOpen(); + emitConnectChallenge(ws2, "nonce-2"); + expect(connectFrameFrom(ws2)).toMatchObject({ + token: "shared-token", + deviceToken: "stored-device-token", + }); + client.stop(); + }); + + it("retries with stored device token when server recommends retry_with_device_token", async () => { + loadDeviceAuthTokenMock.mockReturnValue({ token: "stored-device-token" }); + const client = new GatewayClient({ + url: "ws://127.0.0.1:18789", + token: "shared-token", + }); + + client.start(); + const ws1 = getLatestWs(); + ws1.emitOpen(); + emitConnectChallenge(ws1); + const firstConnectRaw = ws1.sent.find((frame) => frame.includes('"method":"connect"')); + expect(firstConnectRaw).toBeTruthy(); + const firstConnect = JSON.parse(firstConnectRaw ?? "{}") as { id?: string }; + + ws1.emitMessage( + JSON.stringify({ + type: "res", + id: firstConnect.id, + ok: false, + error: { + code: "INVALID_REQUEST", + message: "unauthorized", + details: { code: "AUTH_UNAUTHORIZED", recommendedNextStep: "retry_with_device_token" }, + }, + }), + ); + + await vi.waitFor(() => expect(wsInstances.length).toBeGreaterThan(1), { timeout: 3_000 }); + const ws2 = getLatestWs(); + ws2.emitOpen(); + emitConnectChallenge(ws2, "nonce-2"); + expect(connectFrameFrom(ws2)).toMatchObject({ + token: "shared-token", + deviceToken: "stored-device-token", + }); + client.stop(); + }); + + it("does not auto-reconnect on AUTH_TOKEN_MISSING connect failures", async () => { + vi.useFakeTimers(); + const client = new GatewayClient({ + url: "ws://127.0.0.1:18789", + token: "shared-token", + }); + + client.start(); + const ws1 = getLatestWs(); + ws1.emitOpen(); + emitConnectChallenge(ws1); + const firstConnectRaw = ws1.sent.find((frame) => frame.includes('"method":"connect"')); + expect(firstConnectRaw).toBeTruthy(); + const firstConnect = JSON.parse(firstConnectRaw ?? "{}") as { id?: string }; + + ws1.emitMessage( + JSON.stringify({ + type: "res", + id: firstConnect.id, + ok: false, + error: { + code: "INVALID_REQUEST", + message: "unauthorized", + details: { code: "AUTH_TOKEN_MISSING" }, + }, + }), + ); + + await vi.advanceTimersByTimeAsync(30_000); + expect(wsInstances).toHaveLength(1); + client.stop(); + vi.useRealTimers(); + }); + + it("does not auto-reconnect on token mismatch when retry is not trusted", async () => { + vi.useFakeTimers(); + loadDeviceAuthTokenMock.mockReturnValue({ token: "stored-device-token" }); + const client = new GatewayClient({ + url: "wss://gateway.example.com:18789", + token: "shared-token", + }); + + client.start(); + const ws1 = getLatestWs(); + ws1.emitOpen(); + emitConnectChallenge(ws1); + const firstConnectRaw = ws1.sent.find((frame) => frame.includes('"method":"connect"')); + expect(firstConnectRaw).toBeTruthy(); + const firstConnect = JSON.parse(firstConnectRaw ?? "{}") as { id?: string }; + + ws1.emitMessage( + JSON.stringify({ + type: "res", + id: firstConnect.id, + ok: false, + error: { + code: "INVALID_REQUEST", + message: "unauthorized", + details: { code: "AUTH_TOKEN_MISMATCH", canRetryWithDeviceToken: true }, + }, + }), + ); + + await vi.advanceTimersByTimeAsync(30_000); + expect(wsInstances).toHaveLength(1); + client.stop(); + vi.useRealTimers(); + }); }); diff --git a/src/gateway/client.ts b/src/gateway/client.ts index 4641545ea8e..489347e54f9 100644 --- a/src/gateway/client.ts +++ b/src/gateway/client.ts @@ -11,7 +11,6 @@ import { publicKeyRawBase64UrlFromPem, signDevicePayload, } from "../infra/device-identity.js"; -import { clearDevicePairing } from "../infra/device-pairing.js"; import { normalizeFingerprint } from "../infra/tls/fingerprint.js"; import { rawDataToString } from "../infra/ws.js"; import { logDebug, logError } from "../logger.js"; @@ -23,7 +22,13 @@ import { } from "../utils/message-channel.js"; import { VERSION } from "../version.js"; import { buildDeviceAuthPayloadV3 } from "./device-auth.js"; -import { isSecureWebSocketUrl } from "./net.js"; +import { isLoopbackHost, isSecureWebSocketUrl } from "./net.js"; +import { + ConnectErrorDetailCodes, + readConnectErrorDetailCode, + readConnectErrorRecoveryAdvice, + type ConnectErrorRecoveryAdvice, +} from "./protocol/connect-error-details.js"; import { type ConnectParams, type EventFrame, @@ -41,6 +46,24 @@ type Pending = { expectFinal: boolean; }; +type GatewayClientErrorShape = { + code?: string; + message?: string; + details?: unknown; +}; + +class GatewayClientRequestError extends Error { + readonly gatewayCode: string; + readonly details?: unknown; + + constructor(error: GatewayClientErrorShape) { + super(error.message ?? "gateway request failed"); + this.name = "GatewayClientRequestError"; + this.gatewayCode = error.code ?? "UNAVAILABLE"; + this.details = error.details; + } +} + export type GatewayClientOptions = { url?: string; // ws://127.0.0.1:18789 connectDelayMs?: number; @@ -93,6 +116,9 @@ export class GatewayClient { private connectNonce: string | null = null; private connectSent = false; private connectTimer: NodeJS.Timeout | null = null; + private pendingDeviceTokenRetry = false; + private deviceTokenRetryBudgetUsed = false; + private pendingConnectErrorDetailCode: string | null = null; // Track last tick to detect silent stalls. private lastTick: number | null = null; private tickIntervalMs = 30_000; @@ -184,6 +210,8 @@ export class GatewayClient { this.ws.on("message", (data) => this.handleMessage(rawDataToString(data))); this.ws.on("close", (code, reason) => { const reasonText = rawDataToString(reason); + const connectErrorDetailCode = this.pendingConnectErrorDetailCode; + this.pendingConnectErrorDetailCode = null; this.ws = null; // Clear persisted device auth state only when device-token auth was active. // Shared token/password failures can return the same close reason but should @@ -199,9 +227,6 @@ export class GatewayClient { const role = this.opts.role ?? "operator"; try { clearDeviceAuthToken({ deviceId, role }); - void clearDevicePairing(deviceId).catch((err) => { - logDebug(`failed clearing stale device pairing for device ${deviceId}: ${String(err)}`); - }); logDebug(`cleared stale device-auth token for device ${deviceId}`); } catch (err) { logDebug( @@ -210,6 +235,10 @@ export class GatewayClient { } } this.flushPendingErrors(new Error(`gateway closed (${code}): ${reasonText}`)); + if (this.shouldPauseReconnectAfterAuthFailure(connectErrorDetailCode)) { + this.opts.onClose?.(code, reasonText); + return; + } this.scheduleReconnect(); this.opts.onClose?.(code, reasonText); }); @@ -223,6 +252,9 @@ export class GatewayClient { stop() { this.closed = true; + this.pendingDeviceTokenRetry = false; + this.deviceTokenRetryBudgetUsed = false; + this.pendingConnectErrorDetailCode = null; if (this.tickTimer) { clearInterval(this.tickTimer); this.tickTimer = null; @@ -253,11 +285,20 @@ export class GatewayClient { const storedToken = this.opts.deviceIdentity ? loadDeviceAuthToken({ deviceId: this.opts.deviceIdentity.deviceId, role })?.token : null; + const shouldUseDeviceRetryToken = + this.pendingDeviceTokenRetry && + !explicitDeviceToken && + Boolean(explicitGatewayToken) && + Boolean(storedToken) && + this.isTrustedDeviceRetryEndpoint(); + if (shouldUseDeviceRetryToken) { + this.pendingDeviceTokenRetry = false; + } // Keep shared gateway credentials explicit. Persisted per-device tokens only // participate when no explicit shared token/password is provided. const resolvedDeviceToken = explicitDeviceToken ?? - (!(explicitGatewayToken || this.opts.password?.trim()) + (shouldUseDeviceRetryToken || !(explicitGatewayToken || this.opts.password?.trim()) ? (storedToken ?? undefined) : undefined); // Legacy compatibility: keep `auth.token` populated for device-token auth when @@ -327,6 +368,9 @@ export class GatewayClient { void this.request("connect", params) .then((helloOk) => { + this.pendingDeviceTokenRetry = false; + this.deviceTokenRetryBudgetUsed = false; + this.pendingConnectErrorDetailCode = null; const authInfo = helloOk?.auth; if (authInfo?.deviceToken && this.opts.deviceIdentity) { storeDeviceAuthToken({ @@ -346,6 +390,19 @@ export class GatewayClient { this.opts.onHelloOk?.(helloOk); }) .catch((err) => { + this.pendingConnectErrorDetailCode = + err instanceof GatewayClientRequestError ? readConnectErrorDetailCode(err.details) : null; + const shouldRetryWithDeviceToken = this.shouldRetryWithStoredDeviceToken({ + error: err, + explicitGatewayToken, + resolvedDeviceToken, + storedToken: storedToken ?? undefined, + }); + if (shouldRetryWithDeviceToken) { + this.pendingDeviceTokenRetry = true; + this.deviceTokenRetryBudgetUsed = true; + this.backoffMs = Math.min(this.backoffMs, 250); + } this.opts.onConnectError?.(err instanceof Error ? err : new Error(String(err))); const msg = `gateway connect failed: ${String(err)}`; if (this.opts.mode === GATEWAY_CLIENT_MODES.PROBE) { @@ -357,6 +414,86 @@ export class GatewayClient { }); } + private shouldPauseReconnectAfterAuthFailure(detailCode: string | null): boolean { + if (!detailCode) { + return false; + } + if ( + detailCode === ConnectErrorDetailCodes.AUTH_TOKEN_MISSING || + detailCode === ConnectErrorDetailCodes.AUTH_PASSWORD_MISSING || + detailCode === ConnectErrorDetailCodes.AUTH_PASSWORD_MISMATCH || + detailCode === ConnectErrorDetailCodes.AUTH_RATE_LIMITED || + detailCode === ConnectErrorDetailCodes.PAIRING_REQUIRED || + detailCode === ConnectErrorDetailCodes.CONTROL_UI_DEVICE_IDENTITY_REQUIRED || + detailCode === ConnectErrorDetailCodes.DEVICE_IDENTITY_REQUIRED + ) { + return true; + } + if (detailCode !== ConnectErrorDetailCodes.AUTH_TOKEN_MISMATCH) { + return false; + } + if (this.pendingDeviceTokenRetry) { + return false; + } + // If the endpoint is not trusted for retry, mismatch is terminal until operator action. + if (!this.isTrustedDeviceRetryEndpoint()) { + return true; + } + // Pause mismatch reconnect loops once the one-shot device-token retry is consumed. + return this.deviceTokenRetryBudgetUsed; + } + + private shouldRetryWithStoredDeviceToken(params: { + error: unknown; + explicitGatewayToken?: string; + storedToken?: string; + resolvedDeviceToken?: string; + }): boolean { + if (this.deviceTokenRetryBudgetUsed) { + return false; + } + if (params.resolvedDeviceToken) { + return false; + } + if (!params.explicitGatewayToken || !params.storedToken) { + return false; + } + if (!this.isTrustedDeviceRetryEndpoint()) { + return false; + } + if (!(params.error instanceof GatewayClientRequestError)) { + return false; + } + const detailCode = readConnectErrorDetailCode(params.error.details); + const advice: ConnectErrorRecoveryAdvice = readConnectErrorRecoveryAdvice(params.error.details); + const retryWithDeviceTokenRecommended = + advice.recommendedNextStep === "retry_with_device_token"; + return ( + advice.canRetryWithDeviceToken === true || + retryWithDeviceTokenRecommended || + detailCode === ConnectErrorDetailCodes.AUTH_TOKEN_MISMATCH + ); + } + + private isTrustedDeviceRetryEndpoint(): boolean { + const rawUrl = this.opts.url ?? "ws://127.0.0.1:18789"; + try { + const parsed = new URL(rawUrl); + const protocol = + parsed.protocol === "https:" + ? "wss:" + : parsed.protocol === "http:" + ? "ws:" + : parsed.protocol; + if (isLoopbackHost(parsed.hostname)) { + return true; + } + return protocol === "wss:" && Boolean(this.opts.tlsFingerprint?.trim()); + } catch { + return false; + } + } + private handleMessage(raw: string) { try { const parsed = JSON.parse(raw); @@ -402,7 +539,13 @@ export class GatewayClient { if (parsed.ok) { pending.resolve(parsed.payload); } else { - pending.reject(new Error(parsed.error?.message ?? "unknown error")); + pending.reject( + new GatewayClientRequestError({ + code: parsed.error?.code, + message: parsed.error?.message ?? "unknown error", + details: parsed.error?.details, + }), + ); } } } catch (err) { diff --git a/src/gateway/protocol/connect-error-details.test.ts b/src/gateway/protocol/connect-error-details.test.ts new file mode 100644 index 00000000000..2a7a2c53979 --- /dev/null +++ b/src/gateway/protocol/connect-error-details.test.ts @@ -0,0 +1,42 @@ +import { describe, expect, it } from "vitest"; +import { + readConnectErrorDetailCode, + readConnectErrorRecoveryAdvice, +} from "./connect-error-details.js"; + +describe("readConnectErrorDetailCode", () => { + it("reads structured detail codes", () => { + expect(readConnectErrorDetailCode({ code: "AUTH_TOKEN_MISMATCH" })).toBe("AUTH_TOKEN_MISMATCH"); + }); + + it("returns null for invalid detail payloads", () => { + expect(readConnectErrorDetailCode(null)).toBeNull(); + expect(readConnectErrorDetailCode("AUTH_TOKEN_MISMATCH")).toBeNull(); + }); +}); + +describe("readConnectErrorRecoveryAdvice", () => { + it("reads retry advice fields when present", () => { + expect( + readConnectErrorRecoveryAdvice({ + canRetryWithDeviceToken: true, + recommendedNextStep: "retry_with_device_token", + }), + ).toEqual({ + canRetryWithDeviceToken: true, + recommendedNextStep: "retry_with_device_token", + }); + }); + + it("returns empty advice for invalid payloads", () => { + expect(readConnectErrorRecoveryAdvice(null)).toEqual({}); + expect(readConnectErrorRecoveryAdvice("x")).toEqual({}); + expect(readConnectErrorRecoveryAdvice({ canRetryWithDeviceToken: "yes" })).toEqual({}); + expect( + readConnectErrorRecoveryAdvice({ + canRetryWithDeviceToken: true, + recommendedNextStep: "retry_with_magic", + }), + ).toEqual({ canRetryWithDeviceToken: true, recommendedNextStep: undefined }); + }); +}); diff --git a/src/gateway/protocol/connect-error-details.ts b/src/gateway/protocol/connect-error-details.ts index 442e8f2c54d..298241c623f 100644 --- a/src/gateway/protocol/connect-error-details.ts +++ b/src/gateway/protocol/connect-error-details.ts @@ -28,6 +28,26 @@ export const ConnectErrorDetailCodes = { export type ConnectErrorDetailCode = (typeof ConnectErrorDetailCodes)[keyof typeof ConnectErrorDetailCodes]; +export type ConnectRecoveryNextStep = + | "retry_with_device_token" + | "update_auth_configuration" + | "update_auth_credentials" + | "wait_then_retry" + | "review_auth_configuration"; + +export type ConnectErrorRecoveryAdvice = { + canRetryWithDeviceToken?: boolean; + recommendedNextStep?: ConnectRecoveryNextStep; +}; + +const CONNECT_RECOVERY_NEXT_STEP_VALUES: ReadonlySet = new Set([ + "retry_with_device_token", + "update_auth_configuration", + "update_auth_credentials", + "wait_then_retry", + "review_auth_configuration", +]); + export function resolveAuthConnectErrorDetailCode( reason: string | undefined, ): ConnectErrorDetailCode { @@ -91,3 +111,26 @@ export function readConnectErrorDetailCode(details: unknown): string | null { const code = (details as { code?: unknown }).code; return typeof code === "string" && code.trim().length > 0 ? code : null; } + +export function readConnectErrorRecoveryAdvice(details: unknown): ConnectErrorRecoveryAdvice { + if (!details || typeof details !== "object" || Array.isArray(details)) { + return {}; + } + const raw = details as { + canRetryWithDeviceToken?: unknown; + recommendedNextStep?: unknown; + }; + const canRetryWithDeviceToken = + typeof raw.canRetryWithDeviceToken === "boolean" ? raw.canRetryWithDeviceToken : undefined; + const normalizedNextStep = + typeof raw.recommendedNextStep === "string" ? raw.recommendedNextStep.trim() : ""; + const recommendedNextStep = CONNECT_RECOVERY_NEXT_STEP_VALUES.has( + normalizedNextStep as ConnectRecoveryNextStep, + ) + ? (normalizedNextStep as ConnectRecoveryNextStep) + : undefined; + return { + canRetryWithDeviceToken, + recommendedNextStep, + }; +} diff --git a/src/gateway/reconnect-gating.test.ts b/src/gateway/reconnect-gating.test.ts index 3ea02e21820..d073cc59c3f 100644 --- a/src/gateway/reconnect-gating.test.ts +++ b/src/gateway/reconnect-gating.test.ts @@ -39,9 +39,15 @@ describe("isNonRecoverableAuthError", () => { ); }); + it("blocks reconnect for PAIRING_REQUIRED", () => { + expect(isNonRecoverableAuthError(makeError(ConnectErrorDetailCodes.PAIRING_REQUIRED))).toBe( + true, + ); + }); + it("allows reconnect for AUTH_TOKEN_MISMATCH (device-token fallback flow)", () => { - // Browser client fallback: stale device token → mismatch → sendConnect() clears it → - // next reconnect uses opts.token (shared gateway token). Blocking here breaks recovery. + // Browser client can queue a single trusted-device retry after shared token mismatch. + // Blocking reconnect on mismatch here would skip that bounded recovery attempt. expect(isNonRecoverableAuthError(makeError(ConnectErrorDetailCodes.AUTH_TOKEN_MISMATCH))).toBe( false, ); diff --git a/src/gateway/server.auth.compat-baseline.test.ts b/src/gateway/server.auth.compat-baseline.test.ts new file mode 100644 index 00000000000..d63b62b8b88 --- /dev/null +++ b/src/gateway/server.auth.compat-baseline.test.ts @@ -0,0 +1,196 @@ +import { afterAll, beforeAll, describe, expect, test } from "vitest"; +import { + connectReq, + CONTROL_UI_CLIENT, + ConnectErrorDetailCodes, + getFreePort, + openWs, + originForPort, + restoreGatewayToken, + startGatewayServer, + testState, +} from "./server.auth.shared.js"; + +function expectAuthErrorDetails(params: { + details: unknown; + expectedCode: string; + canRetryWithDeviceToken?: boolean; + recommendedNextStep?: string; +}) { + const details = params.details as + | { + code?: string; + canRetryWithDeviceToken?: boolean; + recommendedNextStep?: string; + } + | undefined; + expect(details?.code).toBe(params.expectedCode); + if (params.canRetryWithDeviceToken !== undefined) { + expect(details?.canRetryWithDeviceToken).toBe(params.canRetryWithDeviceToken); + } + if (params.recommendedNextStep !== undefined) { + expect(details?.recommendedNextStep).toBe(params.recommendedNextStep); + } +} + +describe("gateway auth compatibility baseline", () => { + describe("token mode", () => { + let server: Awaited>; + let port = 0; + let prevToken: string | undefined; + + beforeAll(async () => { + prevToken = process.env.OPENCLAW_GATEWAY_TOKEN; + testState.gatewayAuth = { mode: "token", token: "secret" }; + process.env.OPENCLAW_GATEWAY_TOKEN = "secret"; + port = await getFreePort(); + server = await startGatewayServer(port); + }); + + afterAll(async () => { + await server.close(); + restoreGatewayToken(prevToken); + }); + + test("keeps valid shared-token connect behavior unchanged", async () => { + const ws = await openWs(port); + try { + const res = await connectReq(ws, { token: "secret" }); + expect(res.ok).toBe(true); + } finally { + ws.close(); + } + }); + + test("returns stable token-missing details for control ui without token", async () => { + const ws = await openWs(port, { origin: originForPort(port) }); + try { + const res = await connectReq(ws, { + skipDefaultAuth: true, + client: { ...CONTROL_UI_CLIENT }, + }); + expect(res.ok).toBe(false); + expect(res.error?.message ?? "").toContain("Control UI settings"); + expectAuthErrorDetails({ + details: res.error?.details, + expectedCode: ConnectErrorDetailCodes.AUTH_TOKEN_MISSING, + canRetryWithDeviceToken: false, + recommendedNextStep: "update_auth_configuration", + }); + } finally { + ws.close(); + } + }); + + test("provides one-time retry hint for shared token mismatches", async () => { + const ws = await openWs(port); + try { + const res = await connectReq(ws, { token: "wrong" }); + expect(res.ok).toBe(false); + expect(res.error?.message ?? "").toContain("gateway token mismatch"); + expectAuthErrorDetails({ + details: res.error?.details, + expectedCode: ConnectErrorDetailCodes.AUTH_TOKEN_MISMATCH, + canRetryWithDeviceToken: true, + recommendedNextStep: "retry_with_device_token", + }); + } finally { + ws.close(); + } + }); + + test("keeps explicit device token mismatch semantics stable", async () => { + const ws = await openWs(port); + try { + const res = await connectReq(ws, { + skipDefaultAuth: true, + deviceToken: "not-a-valid-device-token", + }); + expect(res.ok).toBe(false); + expect(res.error?.message ?? "").toContain("device token mismatch"); + expectAuthErrorDetails({ + details: res.error?.details, + expectedCode: ConnectErrorDetailCodes.AUTH_DEVICE_TOKEN_MISMATCH, + canRetryWithDeviceToken: false, + recommendedNextStep: "update_auth_credentials", + }); + } finally { + ws.close(); + } + }); + }); + + describe("password mode", () => { + let server: Awaited>; + let port = 0; + let prevToken: string | undefined; + + beforeAll(async () => { + prevToken = process.env.OPENCLAW_GATEWAY_TOKEN; + testState.gatewayAuth = { mode: "password", password: "secret" }; + delete process.env.OPENCLAW_GATEWAY_TOKEN; + port = await getFreePort(); + server = await startGatewayServer(port); + }); + + afterAll(async () => { + await server.close(); + restoreGatewayToken(prevToken); + }); + + test("keeps valid shared-password connect behavior unchanged", async () => { + const ws = await openWs(port); + try { + const res = await connectReq(ws, { password: "secret" }); + expect(res.ok).toBe(true); + } finally { + ws.close(); + } + }); + + test("returns stable password mismatch details", async () => { + const ws = await openWs(port); + try { + const res = await connectReq(ws, { password: "wrong" }); + expect(res.ok).toBe(false); + expectAuthErrorDetails({ + details: res.error?.details, + expectedCode: ConnectErrorDetailCodes.AUTH_PASSWORD_MISMATCH, + canRetryWithDeviceToken: false, + recommendedNextStep: "update_auth_credentials", + }); + } finally { + ws.close(); + } + }); + }); + + describe("none mode", () => { + let server: Awaited>; + let port = 0; + let prevToken: string | undefined; + + beforeAll(async () => { + prevToken = process.env.OPENCLAW_GATEWAY_TOKEN; + testState.gatewayAuth = { mode: "none" }; + delete process.env.OPENCLAW_GATEWAY_TOKEN; + port = await getFreePort(); + server = await startGatewayServer(port); + }); + + afterAll(async () => { + await server.close(); + restoreGatewayToken(prevToken); + }); + + test("keeps auth-none loopback behavior unchanged", async () => { + const ws = await openWs(port); + try { + const res = await connectReq(ws, { skipDefaultAuth: true }); + expect(res.ok).toBe(true); + } finally { + ws.close(); + } + }); + }); +}); diff --git a/src/gateway/server.auth.control-ui.suite.ts b/src/gateway/server.auth.control-ui.suite.ts index 3817cead335..12698faf3bf 100644 --- a/src/gateway/server.auth.control-ui.suite.ts +++ b/src/gateway/server.auth.control-ui.suite.ts @@ -391,9 +391,16 @@ export function registerControlUiAndPairingSuite(): void { expect(res.ok).toBe(false); expect(res.error?.message ?? "").toContain("gateway token mismatch"); expect(res.error?.message ?? "").not.toContain("device token mismatch"); - expect((res.error?.details as { code?: string } | undefined)?.code).toBe( - ConnectErrorDetailCodes.AUTH_TOKEN_MISMATCH, - ); + const details = res.error?.details as + | { + code?: string; + canRetryWithDeviceToken?: boolean; + recommendedNextStep?: string; + } + | undefined; + expect(details?.code).toBe(ConnectErrorDetailCodes.AUTH_TOKEN_MISMATCH); + expect(details?.canRetryWithDeviceToken).toBe(true); + expect(details?.recommendedNextStep).toBe("retry_with_device_token"); }, }, { diff --git a/src/gateway/server/ws-connection/message-handler.ts b/src/gateway/server/ws-connection/message-handler.ts index f1568796192..83d1b5f12a3 100644 --- a/src/gateway/server/ws-connection/message-handler.ts +++ b/src/gateway/server/ws-connection/message-handler.ts @@ -562,6 +562,31 @@ export function attachGatewayWsMessageHandler(params: { clientIp: browserRateLimitClientIp, }); const rejectUnauthorized = (failedAuth: GatewayAuthResult) => { + const canRetryWithDeviceToken = + failedAuth.reason === "token_mismatch" && + Boolean(device) && + hasSharedAuth && + !connectParams.auth?.deviceToken; + const recommendedNextStep = (() => { + if (canRetryWithDeviceToken) { + return "retry_with_device_token"; + } + switch (failedAuth.reason) { + case "token_missing": + case "token_missing_config": + case "password_missing": + case "password_missing_config": + return "update_auth_configuration"; + case "token_mismatch": + case "password_mismatch": + case "device_token_mismatch": + return "update_auth_credentials"; + case "rate_limited": + return "wait_then_retry"; + default: + return "review_auth_configuration"; + } + })(); markHandshakeFailure("unauthorized", { authMode: resolvedAuth.mode, authProvided: connectParams.auth?.password @@ -594,6 +619,8 @@ export function attachGatewayWsMessageHandler(params: { details: { code: resolveAuthConnectErrorDetailCode(failedAuth.reason), authReason: failedAuth.reason, + canRetryWithDeviceToken, + recommendedNextStep, }, }); close(1008, truncateCloseReason(authMessage)); diff --git a/ui/src/ui/gateway.node.test.ts b/ui/src/ui/gateway.node.test.ts index 07c63a7117b..c77f3a3684c 100644 --- a/ui/src/ui/gateway.node.test.ts +++ b/ui/src/ui/gateway.node.test.ts @@ -1,5 +1,5 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; -import { storeDeviceAuthToken } from "./device-auth.ts"; +import { loadDeviceAuthToken, storeDeviceAuthToken } from "./device-auth.ts"; import type { DeviceIdentity } from "./device-identity.ts"; const wsInstances = vi.hoisted((): MockWebSocket[] => []); @@ -54,6 +54,12 @@ class MockWebSocket { this.readyState = 3; } + emitClose(code = 1000, reason = "") { + for (const handler of this.handlers.close) { + handler({ code, reason }); + } + } + emitOpen() { for (const handler of this.handlers.open) { handler(); @@ -106,6 +112,7 @@ describe("GatewayBrowserClient", () => { }); afterEach(() => { + vi.useRealTimers(); vi.unstubAllGlobals(); }); @@ -166,4 +173,212 @@ describe("GatewayBrowserClient", () => { const signedPayload = signDevicePayloadMock.mock.calls[0]?.[1]; expect(signedPayload).toContain("|stored-device-token|nonce-1"); }); + + it("retries once with device token after token mismatch when shared token is explicit", async () => { + vi.useFakeTimers(); + const client = new GatewayBrowserClient({ + url: "ws://127.0.0.1:18789", + token: "shared-auth-token", + }); + + client.start(); + const ws1 = getLatestWebSocket(); + ws1.emitOpen(); + ws1.emitMessage({ + type: "event", + event: "connect.challenge", + payload: { nonce: "nonce-1" }, + }); + await vi.waitFor(() => expect(ws1.sent.length).toBeGreaterThan(0)); + const firstConnect = JSON.parse(ws1.sent.at(-1) ?? "{}") as { + id: string; + params?: { auth?: { token?: string; deviceToken?: string } }; + }; + expect(firstConnect.params?.auth?.token).toBe("shared-auth-token"); + expect(firstConnect.params?.auth?.deviceToken).toBeUndefined(); + + ws1.emitMessage({ + type: "res", + id: firstConnect.id, + ok: false, + error: { + code: "INVALID_REQUEST", + message: "unauthorized", + details: { code: "AUTH_TOKEN_MISMATCH", canRetryWithDeviceToken: true }, + }, + }); + await vi.waitFor(() => expect(ws1.readyState).toBe(3)); + ws1.emitClose(4008, "connect failed"); + + await vi.advanceTimersByTimeAsync(800); + const ws2 = getLatestWebSocket(); + expect(ws2).not.toBe(ws1); + ws2.emitOpen(); + ws2.emitMessage({ + type: "event", + event: "connect.challenge", + payload: { nonce: "nonce-2" }, + }); + await vi.waitFor(() => expect(ws2.sent.length).toBeGreaterThan(0)); + const secondConnect = JSON.parse(ws2.sent.at(-1) ?? "{}") as { + id: string; + params?: { auth?: { token?: string; deviceToken?: string } }; + }; + expect(secondConnect.params?.auth?.token).toBe("shared-auth-token"); + expect(secondConnect.params?.auth?.deviceToken).toBe("stored-device-token"); + + ws2.emitMessage({ + type: "res", + id: secondConnect.id, + ok: false, + error: { + code: "INVALID_REQUEST", + message: "unauthorized", + details: { code: "AUTH_TOKEN_MISMATCH" }, + }, + }); + await vi.waitFor(() => expect(ws2.readyState).toBe(3)); + ws2.emitClose(4008, "connect failed"); + expect(loadDeviceAuthToken({ deviceId: "device-1", role: "operator" })?.token).toBe( + "stored-device-token", + ); + await vi.advanceTimersByTimeAsync(30_000); + expect(wsInstances).toHaveLength(2); + + vi.useRealTimers(); + }); + + it("treats IPv6 loopback as trusted for bounded device-token retry", async () => { + vi.useFakeTimers(); + const client = new GatewayBrowserClient({ + url: "ws://[::1]:18789", + token: "shared-auth-token", + }); + + client.start(); + const ws1 = getLatestWebSocket(); + ws1.emitOpen(); + ws1.emitMessage({ + type: "event", + event: "connect.challenge", + payload: { nonce: "nonce-1" }, + }); + await vi.waitFor(() => expect(ws1.sent.length).toBeGreaterThan(0)); + const firstConnect = JSON.parse(ws1.sent.at(-1) ?? "{}") as { + id: string; + params?: { auth?: { token?: string; deviceToken?: string } }; + }; + expect(firstConnect.params?.auth?.token).toBe("shared-auth-token"); + expect(firstConnect.params?.auth?.deviceToken).toBeUndefined(); + + ws1.emitMessage({ + type: "res", + id: firstConnect.id, + ok: false, + error: { + code: "INVALID_REQUEST", + message: "unauthorized", + details: { code: "AUTH_TOKEN_MISMATCH", canRetryWithDeviceToken: true }, + }, + }); + await vi.waitFor(() => expect(ws1.readyState).toBe(3)); + ws1.emitClose(4008, "connect failed"); + + await vi.advanceTimersByTimeAsync(800); + const ws2 = getLatestWebSocket(); + expect(ws2).not.toBe(ws1); + ws2.emitOpen(); + ws2.emitMessage({ + type: "event", + event: "connect.challenge", + payload: { nonce: "nonce-2" }, + }); + await vi.waitFor(() => expect(ws2.sent.length).toBeGreaterThan(0)); + const secondConnect = JSON.parse(ws2.sent.at(-1) ?? "{}") as { + params?: { auth?: { token?: string; deviceToken?: string } }; + }; + expect(secondConnect.params?.auth?.token).toBe("shared-auth-token"); + expect(secondConnect.params?.auth?.deviceToken).toBe("stored-device-token"); + + client.stop(); + vi.useRealTimers(); + }); + + it("continues reconnecting on first token mismatch when no retry was attempted", async () => { + vi.useFakeTimers(); + window.localStorage.clear(); + + const client = new GatewayBrowserClient({ + url: "ws://127.0.0.1:18789", + token: "shared-auth-token", + }); + + client.start(); + const ws1 = getLatestWebSocket(); + ws1.emitOpen(); + ws1.emitMessage({ + type: "event", + event: "connect.challenge", + payload: { nonce: "nonce-1" }, + }); + await vi.waitFor(() => expect(ws1.sent.length).toBeGreaterThan(0)); + const firstConnect = JSON.parse(ws1.sent.at(-1) ?? "{}") as { id: string }; + + ws1.emitMessage({ + type: "res", + id: firstConnect.id, + ok: false, + error: { + code: "INVALID_REQUEST", + message: "unauthorized", + details: { code: "AUTH_TOKEN_MISMATCH" }, + }, + }); + await vi.waitFor(() => expect(ws1.readyState).toBe(3)); + ws1.emitClose(4008, "connect failed"); + + await vi.advanceTimersByTimeAsync(800); + expect(wsInstances).toHaveLength(2); + + client.stop(); + vi.useRealTimers(); + }); + + it("does not auto-reconnect on AUTH_TOKEN_MISSING", async () => { + vi.useFakeTimers(); + window.localStorage.clear(); + + const client = new GatewayBrowserClient({ + url: "ws://127.0.0.1:18789", + }); + + client.start(); + const ws1 = getLatestWebSocket(); + ws1.emitOpen(); + ws1.emitMessage({ + type: "event", + event: "connect.challenge", + payload: { nonce: "nonce-1" }, + }); + await vi.waitFor(() => expect(ws1.sent.length).toBeGreaterThan(0)); + const connect = JSON.parse(ws1.sent.at(-1) ?? "{}") as { id: string }; + + ws1.emitMessage({ + type: "res", + id: connect.id, + ok: false, + error: { + code: "INVALID_REQUEST", + message: "unauthorized", + details: { code: "AUTH_TOKEN_MISSING" }, + }, + }); + await vi.waitFor(() => expect(ws1.readyState).toBe(3)); + ws1.emitClose(4008, "connect failed"); + + await vi.advanceTimersByTimeAsync(30_000); + expect(wsInstances).toHaveLength(1); + + vi.useRealTimers(); + }); }); diff --git a/ui/src/ui/gateway.ts b/ui/src/ui/gateway.ts index c5d4bad86a3..c0d9ef71271 100644 --- a/ui/src/ui/gateway.ts +++ b/ui/src/ui/gateway.ts @@ -7,6 +7,7 @@ import { } from "../../../src/gateway/protocol/client-info.js"; import { ConnectErrorDetailCodes, + readConnectErrorRecoveryAdvice, readConnectErrorDetailCode, } from "../../../src/gateway/protocol/connect-error-details.js"; import { clearDeviceAuthToken, loadDeviceAuthToken, storeDeviceAuthToken } from "./device-auth.ts"; @@ -57,11 +58,9 @@ export function resolveGatewayErrorDetailCode( * Auth errors that won't resolve without user action — don't auto-reconnect. * * NOTE: AUTH_TOKEN_MISMATCH is intentionally NOT included here because the - * browser client has a device-token fallback flow: a stale cached device token - * triggers a mismatch, sendConnect() clears it, and the next reconnect retries - * with opts.token (the shared gateway token). Blocking reconnect on mismatch - * would break that fallback. The rate limiter still catches persistent wrong - * tokens after N failures → AUTH_RATE_LIMITED stops the loop. + * browser client supports a bounded one-time retry with a cached device token + * when the endpoint is trusted. Reconnect suppression for mismatch is handled + * with client state (after retry budget is exhausted). */ export function isNonRecoverableAuthError(error: GatewayErrorInfo | undefined): boolean { if (!error) { @@ -72,10 +71,30 @@ export function isNonRecoverableAuthError(error: GatewayErrorInfo | undefined): code === ConnectErrorDetailCodes.AUTH_TOKEN_MISSING || code === ConnectErrorDetailCodes.AUTH_PASSWORD_MISSING || code === ConnectErrorDetailCodes.AUTH_PASSWORD_MISMATCH || - code === ConnectErrorDetailCodes.AUTH_RATE_LIMITED + code === ConnectErrorDetailCodes.AUTH_RATE_LIMITED || + code === ConnectErrorDetailCodes.PAIRING_REQUIRED || + code === ConnectErrorDetailCodes.CONTROL_UI_DEVICE_IDENTITY_REQUIRED || + code === ConnectErrorDetailCodes.DEVICE_IDENTITY_REQUIRED ); } +function isTrustedRetryEndpoint(url: string): boolean { + try { + const gatewayUrl = new URL(url, window.location.href); + const host = gatewayUrl.hostname.trim().toLowerCase(); + const isLoopbackHost = + host === "localhost" || host === "::1" || host === "[::1]" || host === "127.0.0.1"; + const isLoopbackIPv4 = host.startsWith("127."); + if (isLoopbackHost || isLoopbackIPv4) { + return true; + } + const pageUrl = new URL(window.location.href); + return gatewayUrl.host === pageUrl.host; + } catch { + return false; + } +} + export type GatewayHelloOk = { type: "hello-ok"; protocol: number; @@ -127,6 +146,8 @@ export class GatewayBrowserClient { private connectTimer: number | null = null; private backoffMs = 800; private pendingConnectError: GatewayErrorInfo | undefined; + private pendingDeviceTokenRetry = false; + private deviceTokenRetryBudgetUsed = false; constructor(private opts: GatewayBrowserClientOptions) {} @@ -140,6 +161,8 @@ export class GatewayBrowserClient { this.ws?.close(); this.ws = null; this.pendingConnectError = undefined; + this.pendingDeviceTokenRetry = false; + this.deviceTokenRetryBudgetUsed = false; this.flushPending(new Error("gateway client stopped")); } @@ -161,6 +184,14 @@ export class GatewayBrowserClient { this.ws = null; this.flushPending(new Error(`gateway closed (${ev.code}): ${reason}`)); this.opts.onClose?.({ code: ev.code, reason, error: connectError }); + const connectErrorCode = resolveGatewayErrorDetailCode(connectError); + if ( + connectErrorCode === ConnectErrorDetailCodes.AUTH_TOKEN_MISMATCH && + this.deviceTokenRetryBudgetUsed && + !this.pendingDeviceTokenRetry + ) { + return; + } if (!isNonRecoverableAuthError(connectError)) { this.scheduleReconnect(); } @@ -215,9 +246,20 @@ export class GatewayBrowserClient { deviceId: deviceIdentity.deviceId, role, })?.token; - deviceToken = !(explicitGatewayToken || this.opts.password?.trim()) - ? (storedToken ?? undefined) - : undefined; + const shouldUseDeviceRetryToken = + this.pendingDeviceTokenRetry && + !deviceToken && + Boolean(explicitGatewayToken) && + Boolean(storedToken) && + isTrustedRetryEndpoint(this.opts.url); + if (shouldUseDeviceRetryToken) { + deviceToken = storedToken ?? undefined; + this.pendingDeviceTokenRetry = false; + } else { + deviceToken = !(explicitGatewayToken || this.opts.password?.trim()) + ? (storedToken ?? undefined) + : undefined; + } canFallbackToShared = Boolean(deviceToken && explicitGatewayToken); } authToken = explicitGatewayToken ?? deviceToken; @@ -225,6 +267,7 @@ export class GatewayBrowserClient { authToken || this.opts.password ? { token: authToken, + deviceToken, password: this.opts.password, } : undefined; @@ -282,6 +325,8 @@ export class GatewayBrowserClient { void this.request("connect", params) .then((hello) => { + this.pendingDeviceTokenRetry = false; + this.deviceTokenRetryBudgetUsed = false; if (hello?.auth?.deviceToken && deviceIdentity) { storeDeviceAuthToken({ deviceId: deviceIdentity.deviceId, @@ -294,6 +339,33 @@ export class GatewayBrowserClient { this.opts.onHello?.(hello); }) .catch((err: unknown) => { + const connectErrorCode = + err instanceof GatewayRequestError ? resolveGatewayErrorDetailCode(err) : null; + const recoveryAdvice = + err instanceof GatewayRequestError ? readConnectErrorRecoveryAdvice(err.details) : {}; + const retryWithDeviceTokenRecommended = + recoveryAdvice.recommendedNextStep === "retry_with_device_token"; + const canRetryWithDeviceTokenHint = + recoveryAdvice.canRetryWithDeviceToken === true || + retryWithDeviceTokenRecommended || + connectErrorCode === ConnectErrorDetailCodes.AUTH_TOKEN_MISMATCH; + const shouldRetryWithDeviceToken = + !this.deviceTokenRetryBudgetUsed && + !deviceToken && + Boolean(explicitGatewayToken) && + Boolean(deviceIdentity) && + Boolean( + loadDeviceAuthToken({ + deviceId: deviceIdentity?.deviceId ?? "", + role, + })?.token, + ) && + canRetryWithDeviceTokenHint && + isTrustedRetryEndpoint(this.opts.url); + if (shouldRetryWithDeviceToken) { + this.pendingDeviceTokenRetry = true; + this.deviceTokenRetryBudgetUsed = true; + } if (err instanceof GatewayRequestError) { this.pendingConnectError = { code: err.gatewayCode, @@ -303,7 +375,11 @@ export class GatewayBrowserClient { } else { this.pendingConnectError = undefined; } - if (canFallbackToShared && deviceIdentity) { + if ( + canFallbackToShared && + deviceIdentity && + connectErrorCode === ConnectErrorDetailCodes.AUTH_DEVICE_TOKEN_MISMATCH + ) { clearDeviceAuthToken({ deviceId: deviceIdentity.deviceId, role }); } this.ws?.close(CONNECT_FAILED_CLOSE_CODE, "connect failed");