fix(gateway): retry startup handshakes before surfacing failures

This commit is contained in:
Peter Steinberger
2026-04-29 10:25:40 +01:00
parent a98a4e6ca5
commit 8d58ad4c15
14 changed files with 430 additions and 7 deletions

View File

@@ -38,6 +38,7 @@ class MockWebSocket {
};
readonly sent: string[] = [];
lastClose: { code?: number; reason?: string } | null = null;
readyState = MockWebSocket.OPEN;
constructor(_url: string) {
@@ -52,7 +53,8 @@ class MockWebSocket {
this.sent.push(data);
}
close() {
close(code?: number, reason?: string) {
this.lastClose = { code, reason };
this.readyState = 3;
}
@@ -355,6 +357,47 @@ describe("GatewayBrowserClient", () => {
vi.useRealTimers();
});
it("retries startup-unavailable connect responses without terminal callbacks", async () => {
vi.useFakeTimers();
const onClose = vi.fn();
const client = new GatewayBrowserClient({
url: "ws://127.0.0.1:18789",
token: "shared-auth-token",
onClose,
});
try {
const { ws, connectFrame } = await startConnect(client);
ws.emitMessage({
type: "res",
id: connectFrame.id,
ok: false,
error: {
code: "UNAVAILABLE",
message: "gateway starting; retry shortly",
details: { reason: "startup-sidecars" },
retryable: true,
retryAfterMs: 250,
},
});
await vi.advanceTimersByTimeAsync(0);
await expectSocketClosed(ws);
expect(ws.lastClose).toEqual({ code: 4013, reason: "gateway starting" });
ws.emitClose(4013, "gateway starting");
expect(onClose).not.toHaveBeenCalled();
expect(wsInstances).toHaveLength(1);
await vi.advanceTimersByTimeAsync(249);
expect(wsInstances).toHaveLength(1);
await vi.advanceTimersByTimeAsync(1);
expect(wsInstances).toHaveLength(2);
} finally {
client.stop();
vi.useRealTimers();
}
});
it("treats IPv6 loopback as trusted for bounded device-token retry", async () => {
vi.useFakeTimers();
const { client } = await startRetriedDeviceTokenConnect({

View File

@@ -11,6 +11,10 @@ import {
readConnectErrorRecoveryAdvice,
readConnectErrorDetailCode,
} from "../../../src/gateway/protocol/connect-error-details.js";
import {
isRetryableGatewayStartupUnavailableError,
resolveGatewayStartupRetryAfterMs,
} from "../../../src/gateway/protocol/startup-unavailable.js";
import { clearDeviceAuthToken, loadDeviceAuthToken, storeDeviceAuthToken } from "./device-auth.ts";
import { loadOrCreateDeviceIdentity, signDevicePayload } from "./device-identity.ts";
import { generateUUID } from "./uuid.ts";
@@ -228,6 +232,7 @@ export type GatewayEventListener = (evt: GatewayEventFrame) => void;
// 4008 = application-defined code (browser rejects 1008 "Policy Violation")
const CONNECT_FAILED_CLOSE_CODE = 4008;
const STARTUP_RETRY_CLOSE_CODE = 4013;
function buildGatewayConnectAuth(
selectedAuth: SelectedConnectAuth,
@@ -302,6 +307,7 @@ export class GatewayBrowserClient {
private pendingConnectError: GatewayErrorInfo | undefined;
private pendingDeviceTokenRetry = false;
private deviceTokenRetryBudgetUsed = false;
private pendingStartupReconnectDelayMs: number | null = null;
private eventListeners = new Set<GatewayEventListener>();
constructor(private opts: GatewayBrowserClientOptions) {}
@@ -319,6 +325,7 @@ export class GatewayBrowserClient {
this.pendingConnectError = undefined;
this.pendingDeviceTokenRetry = false;
this.deviceTokenRetryBudgetUsed = false;
this.pendingStartupReconnectDelayMs = null;
this.flushPending(new Error("gateway client stopped"));
}
@@ -348,6 +355,11 @@ export class GatewayBrowserClient {
const connectError = this.pendingConnectError;
this.pendingConnectError = undefined;
this.ws = null;
if (this.pendingStartupReconnectDelayMs !== null) {
this.flushPending(new Error(`gateway closed (${ev.code}): ${reason}`));
this.scheduleReconnect();
return;
}
this.flushPending(new Error(`gateway closed (${ev.code}): ${reason}`));
this.opts.onClose?.({ code: ev.code, reason, error: connectError });
const connectErrorCode = resolveGatewayErrorDetailCode(connectError);
@@ -371,8 +383,12 @@ export class GatewayBrowserClient {
if (this.closed) {
return;
}
const delay = this.backoffMs;
this.backoffMs = Math.min(this.backoffMs * 1.7, 15_000);
const startupDelay = this.pendingStartupReconnectDelayMs;
this.pendingStartupReconnectDelayMs = null;
const delay = startupDelay ?? this.backoffMs;
if (startupDelay === null) {
this.backoffMs = Math.min(this.backoffMs * 1.7, 15_000);
}
this.clearConnectTimer();
this.connectTimer = window.setTimeout(() => {
this.connectTimer = null;
@@ -468,6 +484,7 @@ export class GatewayBrowserClient {
}
this.pendingDeviceTokenRetry = false;
this.deviceTokenRetryBudgetUsed = false;
this.pendingStartupReconnectDelayMs = null;
if (hello?.auth?.deviceToken && plan.deviceIdentity) {
storeDeviceAuthToken({
deviceId: plan.deviceIdentity.deviceId,
@@ -531,6 +548,14 @@ export class GatewayBrowserClient {
) {
clearDeviceAuthToken({ deviceId: plan.deviceIdentity.deviceId, role: plan.role });
}
const startupRetryAfterMs = resolveGatewayStartupRetryAfterMs(err);
if (startupRetryAfterMs !== null) {
this.pendingStartupReconnectDelayMs = startupRetryAfterMs;
}
if (isRetryableGatewayStartupUnavailableError(err)) {
ws.close(STARTUP_RETRY_CLOSE_CODE, "gateway starting");
return;
}
ws.close(CONNECT_FAILED_CLOSE_CODE, "connect failed");
}