fix(gateway): make startup control-plane retries explicit

* fix(gateway): make startup control-plane retries explicit

* docs(changelog): note startup control-plane retry fix
This commit is contained in:
scoootscooob
2026-05-02 03:16:04 -07:00
committed by GitHub
parent ebc26a0bef
commit ccb847e46f
4 changed files with 47 additions and 27 deletions

View File

@@ -231,6 +231,7 @@ Docs: https://docs.openclaw.ai
- Diagnostics: reset stuck-session timers on reply, tool, status, block, and ACP progress events, and back off repeated `session.stuck` diagnostics while a session remains unchanged. Supersedes #72010. Thanks @rubencu.
- Gateway/agents: avoid rebuilding core tools for plugin-only allowlists and keep the full plugin registry cache warm across scoped plugin loads, reducing per-turn latency spikes. Fixes #75882, #75907, #75906, #75887, and #75851. (#75922) Thanks @obviyus.
- Agents/failover: classify bare `status: internal server error` provider messages as retryable server errors so model fallback can rotate instead of stopping. (#73844) Thanks @thesomewhatyou.
- Gateway/startup: return the shared retryable startup-sidecars error for startup-gated control-plane RPCs such as sessions.create, sessions.send, sessions.abort, agent.wait, and tools.effective, so clients can retry early sidecar races. (#76012) Thanks @scoootscooob.
## 2026.4.30

View File

@@ -3,8 +3,10 @@ import {
__testing as controlPlaneRateLimitTesting,
resolveControlPlaneRateLimitKey,
} from "./control-plane-rate-limit.js";
import { isRetryableGatewayStartupUnavailableError } from "./protocol/startup-unavailable.js";
import { handleGatewayRequest } from "./server-methods.js";
import type { GatewayRequestHandler } from "./server-methods/types.js";
import { STARTUP_UNAVAILABLE_GATEWAY_METHODS } from "./server-startup-unavailable-methods.js";
const noWebchat = () => false;
@@ -131,32 +133,37 @@ describe("gateway control-plane write rate limit", () => {
expect(handlerCalls).toHaveBeenCalledTimes(4);
});
it("blocks startup-gated methods before dispatch", async () => {
const handlerCalls = vi.fn();
const handler: GatewayRequestHandler = (opts) => {
handlerCalls(opts);
opts.respond(true, undefined, undefined);
};
const context = {
...buildContext(),
unavailableGatewayMethods: new Set(["chat.history", "models.list"]),
} as Parameters<typeof handleGatewayRequest>[0]["context"];
const client = buildClient();
it.each(STARTUP_UNAVAILABLE_GATEWAY_METHODS)(
"blocks startup-gated method %s before dispatch with a retryable startup error",
async (method) => {
const handlerCalls = vi.fn();
const handler: GatewayRequestHandler = (opts) => {
handlerCalls(opts);
opts.respond(true, undefined, undefined);
};
const context = {
...buildContext(),
unavailableGatewayMethods: new Set(STARTUP_UNAVAILABLE_GATEWAY_METHODS),
} as Parameters<typeof handleGatewayRequest>[0]["context"];
const client = buildClient();
const blocked = await runRequest({ method: "models.list", context, client, handler });
const blocked = await runRequest({ method, context, client, handler });
expect(handlerCalls).not.toHaveBeenCalled();
expect(blocked).toHaveBeenCalledWith(
false,
undefined,
expect.objectContaining({
code: "UNAVAILABLE",
retryable: true,
retryAfterMs: 500,
details: { method: "models.list" },
}),
);
});
expect(handlerCalls).not.toHaveBeenCalled();
expect(blocked).toHaveBeenCalledWith(
false,
undefined,
expect.objectContaining({
code: "UNAVAILABLE",
retryable: true,
retryAfterMs: 500,
details: { reason: "startup-sidecars", method },
}),
);
const error = blocked.mock.calls[0]?.[2];
expect(isRetryableGatewayStartupUnavailableError(error)).toBe(true);
},
);
it("uses connId fallback when both device and client IP are unknown", () => {
const key = resolveControlPlaneRateLimitKey({

View File

@@ -3,6 +3,10 @@ import { formatControlPlaneActor, resolveControlPlaneActor } from "./control-pla
import { consumeControlPlaneWriteBudget } from "./control-plane-rate-limit.js";
import { ADMIN_SCOPE, authorizeOperatorScopesForMethod } from "./method-scopes.js";
import { ErrorCodes, errorShape } from "./protocol/index.js";
import {
gatewayStartupUnavailableDetails,
GATEWAY_STARTUP_RETRY_AFTER_MS,
} from "./protocol/startup-unavailable.js";
import { isRoleAuthorizedForMethod, parseGatewayRole } from "./role-policy.js";
import { agentHandlers } from "./server-methods/agent.js";
import { agentsHandlers } from "./server-methods/agents.js";
@@ -128,8 +132,8 @@ export async function handleGatewayRequest(
undefined,
errorShape(ErrorCodes.UNAVAILABLE, `${req.method} unavailable during gateway startup`, {
retryable: true,
retryAfterMs: 500,
details: { method: req.method },
retryAfterMs: GATEWAY_STARTUP_RETRY_AFTER_MS,
details: { ...gatewayStartupUnavailableDetails(), method: req.method },
}),
);
return;

View File

@@ -1 +1,9 @@
export const STARTUP_UNAVAILABLE_GATEWAY_METHODS = ["chat.history", "models.list"] as const;
export const STARTUP_UNAVAILABLE_GATEWAY_METHODS = [
"agent.wait",
"chat.history",
"models.list",
"sessions.abort",
"sessions.create",
"sessions.send",
"tools.effective",
] as const;