fix(discord): retry transient outbound failures

This commit is contained in:
Peter Steinberger
2026-05-02 02:46:58 +01:00
parent d961235a89
commit a6ccb5f698
6 changed files with 184 additions and 6 deletions

View File

@@ -5,6 +5,7 @@ import {
type RetryConfig,
} from "openclaw/plugin-sdk/retry-runtime";
import { resolveDiscordAccount } from "./accounts.js";
import { DiscordError } from "./internal/discord.js";
const DISCORD_DELIVERY_RETRY_DEFAULTS = {
attempts: 3,
@@ -13,7 +14,10 @@ const DISCORD_DELIVERY_RETRY_DEFAULTS = {
jitter: 0,
} satisfies Required<RetryConfig>;
function isRetryableDiscordDeliveryError(err: unknown): boolean {
export function isRetryableDiscordDeliveryError(err: unknown): boolean {
if (err instanceof DiscordError) {
return false;
}
const status = (err as { status?: number }).status ?? (err as { statusCode?: number }).statusCode;
return status === 429 || (status !== undefined && status >= 500);
}

View File

@@ -0,0 +1,83 @@
import { describe, expect, it, vi } from "vitest";
import { isRetryableDiscordDeliveryError } from "./delivery-retry.js";
import { DiscordError, RateLimitError } from "./internal/discord.js";
import { createDiscordRetryRunner, isRetryableDiscordTransientError } from "./retry.js";
const ZERO_DELAY_RETRY = { attempts: 2, minDelayMs: 0, maxDelayMs: 0, jitter: 0 };
function createRateLimitError(retryAfter = 0): RateLimitError {
const response = new Response(null, {
status: 429,
headers: {
"X-RateLimit-Scope": "user",
"X-RateLimit-Bucket": "bucket-1",
},
});
const RateLimitErrorCtor = RateLimitError as unknown as new (
response: Response,
body: { message: string; retry_after: number; global: boolean },
) => RateLimitError;
return new RateLimitErrorCtor(response, {
message: "rate limited",
retry_after: retryAfter,
global: false,
});
}
describe("isRetryableDiscordTransientError", () => {
it.each([
["rate limit", createRateLimitError()],
["408 status", Object.assign(new Error("request timeout"), { status: 408 })],
["502 status", Object.assign(new Error("bad gateway"), { status: 502 })],
["503 statusCode", Object.assign(new Error("service unavailable"), { statusCode: 503 })],
["fetch failed", new TypeError("fetch failed")],
["ECONNRESET", Object.assign(new Error("socket hang up"), { code: "ECONNRESET" })],
["ETIMEDOUT cause", new Error("request failed", { cause: { code: "ETIMEDOUT" } })],
["abort", Object.assign(new Error("aborted"), { name: "AbortError" })],
])("retries %s", (_name, err) => {
expect(isRetryableDiscordTransientError(err)).toBe(true);
});
it.each([
["400 status", Object.assign(new Error("bad request"), { status: 400 })],
["403 status", Object.assign(new Error("missing permissions"), { statusCode: 403 })],
["unknown channel", new Error("Unknown Channel")],
["plain string", "fetch failed"],
])("does not retry %s", (_name, err) => {
expect(isRetryableDiscordTransientError(err)).toBe(false);
});
});
describe("createDiscordRetryRunner", () => {
it("retries transient transport errors", async () => {
const fn = vi.fn().mockRejectedValueOnce(new TypeError("fetch failed")).mockResolvedValue("ok");
const runner = createDiscordRetryRunner({ retry: ZERO_DELAY_RETRY });
await expect(runner(fn, "send")).resolves.toBe("ok");
expect(fn).toHaveBeenCalledTimes(2);
});
it("stops after configured transient retry attempts", async () => {
const fn = vi.fn().mockRejectedValue(new TypeError("fetch failed"));
const runner = createDiscordRetryRunner({ retry: ZERO_DELAY_RETRY });
await expect(runner(fn, "send")).rejects.toThrow("fetch failed");
expect(fn).toHaveBeenCalledTimes(2);
});
});
describe("isRetryableDiscordDeliveryError", () => {
it("retries status-coded errors from injected delivery dependencies", () => {
expect(
isRetryableDiscordDeliveryError(Object.assign(new Error("bad gateway"), { status: 502 })),
).toBe(true);
});
it("does not retry Discord client errors after the request runner handled them", () => {
const err = new DiscordError(new Response("upstream", { status: 502 }), {
message: "Bad Gateway",
});
expect(isRetryableDiscordDeliveryError(err)).toBe(false);
});
});

View File

@@ -1,3 +1,9 @@
import {
collectErrorGraphCandidates,
extractErrorCode,
formatErrorMessage,
readErrorName,
} from "openclaw/plugin-sdk/error-runtime";
import {
createRateLimitRetryRunner,
type RetryConfig,
@@ -12,6 +18,71 @@ const DISCORD_RETRY_DEFAULTS = {
jitter: 0.1,
} satisfies RetryConfig;
const DISCORD_RETRYABLE_STATUS_CODES = new Set([408, 429]);
const DISCORD_RETRYABLE_ERROR_CODES = new Set([
"EAI_AGAIN",
"ECONNREFUSED",
"ECONNRESET",
"ENETUNREACH",
"ENOTFOUND",
"EPIPE",
"ETIMEDOUT",
"UND_ERR_BODY_TIMEOUT",
"UND_ERR_CONNECT_TIMEOUT",
"UND_ERR_HEADERS_TIMEOUT",
"UND_ERR_SOCKET",
]);
const DISCORD_TRANSIENT_MESSAGE_RE =
/\b(?:bad gateway|fetch failed|network error|networkerror|service unavailable|socket hang up|temporarily unavailable|timed out|timeout)\b|connection (?:closed|reset|refused)/i;
function readDiscordErrorStatus(err: unknown): number | undefined {
if (!err || typeof err !== "object") {
return undefined;
}
const raw =
"status" in err && err.status !== undefined
? err.status
: "statusCode" in err && err.statusCode !== undefined
? err.statusCode
: undefined;
if (typeof raw === "number" && Number.isFinite(raw)) {
return raw;
}
if (typeof raw === "string" && /^\d+$/.test(raw)) {
return Number(raw);
}
return undefined;
}
export function isRetryableDiscordTransientError(err: unknown): boolean {
if (err instanceof RateLimitError) {
return true;
}
for (const candidate of collectErrorGraphCandidates(err, (current) => [
current.cause,
current.error,
])) {
const status = readDiscordErrorStatus(candidate);
if (status !== undefined && (DISCORD_RETRYABLE_STATUS_CODES.has(status) || status >= 500)) {
return true;
}
const code = extractErrorCode(candidate);
if (code && DISCORD_RETRYABLE_ERROR_CODES.has(code.toUpperCase())) {
return true;
}
if (readErrorName(candidate) === "AbortError") {
return true;
}
if (
(candidate instanceof Error || (candidate !== null && typeof candidate === "object")) &&
DISCORD_TRANSIENT_MESSAGE_RE.test(formatErrorMessage(candidate))
) {
return true;
}
}
return false;
}
export function createDiscordRetryRunner(params: {
retry?: RetryConfig;
configRetry?: RetryConfig;
@@ -21,7 +92,7 @@ export function createDiscordRetryRunner(params: {
...params,
defaults: DISCORD_RETRY_DEFAULTS,
logLabel: "discord",
shouldRetry: (err) => err instanceof RateLimitError,
shouldRetry: isRetryableDiscordTransientError,
retryAfterMs: (err) => (err instanceof RateLimitError ? err.retryAfter * 1000 : undefined),
});
}

View File

@@ -547,16 +547,33 @@ describe("retry rate limits", () => {
expect(postMock).toHaveBeenCalledTimes(2);
});
it("does not retry non-rate-limit errors", async () => {
it("does not retry permanent non-rate-limit errors", async () => {
const { rest, postMock } = makeDiscordRest();
postMock.mockRejectedValueOnce(new Error("network error"));
postMock.mockRejectedValueOnce(new Error("invalid request"));
await expect(
sendMessageDiscord("channel:789", "hello", discordClientOpts(rest)),
).rejects.toThrow("network error");
).rejects.toThrow("invalid request");
expect(postMock).toHaveBeenCalledTimes(1);
});
it("retries transient network errors", async () => {
const { rest, postMock } = makeDiscordRest();
postMock
.mockRejectedValueOnce(new TypeError("fetch failed"))
.mockResolvedValueOnce({ id: "msg1", channel_id: "789" });
const result = await sendMessageDiscord("channel:789", "hello", {
cfg: DISCORD_TEST_CFG,
rest,
token: "t",
retry: { attempts: 2, minDelayMs: 0, maxDelayMs: 0, jitter: 0 },
});
expect(result).toEqual({ messageId: "msg1", channelId: "789" });
expect(postMock).toHaveBeenCalledTimes(2);
});
it("retries reactions on rate limits", async () => {
const { rest, putMock } = makeDiscordRest();
const rateLimitError = createMockRateLimitError(0);