From ccb847e46f1bb533dc2f46cea61b65de6dc3308c Mon Sep 17 00:00:00 2001 From: scoootscooob <167050519+scoootscooob@users.noreply.github.com> Date: Sat, 2 May 2026 03:16:04 -0700 Subject: [PATCH] fix(gateway): make startup control-plane retries explicit * fix(gateway): make startup control-plane retries explicit * docs(changelog): note startup control-plane retry fix --- CHANGELOG.md | 1 + ...r-methods.control-plane-rate-limit.test.ts | 55 +++++++++++-------- src/gateway/server-methods.ts | 8 ++- .../server-startup-unavailable-methods.ts | 10 +++- 4 files changed, 47 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4da6c15f7e3..0659e9f98fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -231,6 +231,7 @@ Docs: https://docs.openclaw.ai - Diagnostics: reset stuck-session timers on reply, tool, status, block, and ACP progress events, and back off repeated `session.stuck` diagnostics while a session remains unchanged. Supersedes #72010. Thanks @rubencu. - Gateway/agents: avoid rebuilding core tools for plugin-only allowlists and keep the full plugin registry cache warm across scoped plugin loads, reducing per-turn latency spikes. Fixes #75882, #75907, #75906, #75887, and #75851. (#75922) Thanks @obviyus. - Agents/failover: classify bare `status: internal server error` provider messages as retryable server errors so model fallback can rotate instead of stopping. (#73844) Thanks @thesomewhatyou. +- Gateway/startup: return the shared retryable startup-sidecars error for startup-gated control-plane RPCs such as sessions.create, sessions.send, sessions.abort, agent.wait, and tools.effective, so clients can retry early sidecar races. (#76012) Thanks @scoootscooob. ## 2026.4.30 diff --git a/src/gateway/server-methods.control-plane-rate-limit.test.ts b/src/gateway/server-methods.control-plane-rate-limit.test.ts index c0cb7dce582..c5608041e29 100644 --- a/src/gateway/server-methods.control-plane-rate-limit.test.ts +++ b/src/gateway/server-methods.control-plane-rate-limit.test.ts @@ -3,8 +3,10 @@ import { __testing as controlPlaneRateLimitTesting, resolveControlPlaneRateLimitKey, } from "./control-plane-rate-limit.js"; +import { isRetryableGatewayStartupUnavailableError } from "./protocol/startup-unavailable.js"; import { handleGatewayRequest } from "./server-methods.js"; import type { GatewayRequestHandler } from "./server-methods/types.js"; +import { STARTUP_UNAVAILABLE_GATEWAY_METHODS } from "./server-startup-unavailable-methods.js"; const noWebchat = () => false; @@ -131,32 +133,37 @@ describe("gateway control-plane write rate limit", () => { expect(handlerCalls).toHaveBeenCalledTimes(4); }); - it("blocks startup-gated methods before dispatch", async () => { - const handlerCalls = vi.fn(); - const handler: GatewayRequestHandler = (opts) => { - handlerCalls(opts); - opts.respond(true, undefined, undefined); - }; - const context = { - ...buildContext(), - unavailableGatewayMethods: new Set(["chat.history", "models.list"]), - } as Parameters[0]["context"]; - const client = buildClient(); + it.each(STARTUP_UNAVAILABLE_GATEWAY_METHODS)( + "blocks startup-gated method %s before dispatch with a retryable startup error", + async (method) => { + const handlerCalls = vi.fn(); + const handler: GatewayRequestHandler = (opts) => { + handlerCalls(opts); + opts.respond(true, undefined, undefined); + }; + const context = { + ...buildContext(), + unavailableGatewayMethods: new Set(STARTUP_UNAVAILABLE_GATEWAY_METHODS), + } as Parameters[0]["context"]; + const client = buildClient(); - const blocked = await runRequest({ method: "models.list", context, client, handler }); + const blocked = await runRequest({ method, context, client, handler }); - expect(handlerCalls).not.toHaveBeenCalled(); - expect(blocked).toHaveBeenCalledWith( - false, - undefined, - expect.objectContaining({ - code: "UNAVAILABLE", - retryable: true, - retryAfterMs: 500, - details: { method: "models.list" }, - }), - ); - }); + expect(handlerCalls).not.toHaveBeenCalled(); + expect(blocked).toHaveBeenCalledWith( + false, + undefined, + expect.objectContaining({ + code: "UNAVAILABLE", + retryable: true, + retryAfterMs: 500, + details: { reason: "startup-sidecars", method }, + }), + ); + const error = blocked.mock.calls[0]?.[2]; + expect(isRetryableGatewayStartupUnavailableError(error)).toBe(true); + }, + ); it("uses connId fallback when both device and client IP are unknown", () => { const key = resolveControlPlaneRateLimitKey({ diff --git a/src/gateway/server-methods.ts b/src/gateway/server-methods.ts index 3ebc6d1b567..78a613a9188 100644 --- a/src/gateway/server-methods.ts +++ b/src/gateway/server-methods.ts @@ -3,6 +3,10 @@ import { formatControlPlaneActor, resolveControlPlaneActor } from "./control-pla import { consumeControlPlaneWriteBudget } from "./control-plane-rate-limit.js"; import { ADMIN_SCOPE, authorizeOperatorScopesForMethod } from "./method-scopes.js"; import { ErrorCodes, errorShape } from "./protocol/index.js"; +import { + gatewayStartupUnavailableDetails, + GATEWAY_STARTUP_RETRY_AFTER_MS, +} from "./protocol/startup-unavailable.js"; import { isRoleAuthorizedForMethod, parseGatewayRole } from "./role-policy.js"; import { agentHandlers } from "./server-methods/agent.js"; import { agentsHandlers } from "./server-methods/agents.js"; @@ -128,8 +132,8 @@ export async function handleGatewayRequest( undefined, errorShape(ErrorCodes.UNAVAILABLE, `${req.method} unavailable during gateway startup`, { retryable: true, - retryAfterMs: 500, - details: { method: req.method }, + retryAfterMs: GATEWAY_STARTUP_RETRY_AFTER_MS, + details: { ...gatewayStartupUnavailableDetails(), method: req.method }, }), ); return; diff --git a/src/gateway/server-startup-unavailable-methods.ts b/src/gateway/server-startup-unavailable-methods.ts index 8dee3394038..294b980486a 100644 --- a/src/gateway/server-startup-unavailable-methods.ts +++ b/src/gateway/server-startup-unavailable-methods.ts @@ -1 +1,9 @@ -export const STARTUP_UNAVAILABLE_GATEWAY_METHODS = ["chat.history", "models.list"] as const; +export const STARTUP_UNAVAILABLE_GATEWAY_METHODS = [ + "agent.wait", + "chat.history", + "models.list", + "sessions.abort", + "sessions.create", + "sessions.send", + "tools.effective", +] as const;