fix(mattermost): address Greptile review - isRetryableError ordering and jitter

- Fix isRetryableError: check 5xx BEFORE 4xx to prevent misclassification
  when 5xx error detail contains 4xx substring (e.g., '503: upstream 404')
- Fix jitter: use proportional jitter (full-jitter pattern) instead of
  hardcoded 1000ms. Jitter is now 0-100% of exponential delay
- Update tests to reflect new jitter behavior
- Add test for 5xx with 4xx substring in error message
This commit is contained in:
Jonathan Jing
2026-03-10 10:57:07 -07:00
committed by Muhammed Mukhthar CM
parent be0e870d94
commit 4e6080d476
2 changed files with 40 additions and 14 deletions

View File

@@ -239,12 +239,12 @@ describe("createMattermostDirectChannelWithRetry", () => {
});
expect(delays).toHaveLength(2);
// First retry should be around initialDelayMs (100ms) + jitter
// First retry: exponentialDelay = 100ms, jitter = 0-100ms, total = 100-200ms
expect(delays[0]).toBeGreaterThanOrEqual(100);
expect(delays[0]).toBeLessThanOrEqual(1000);
// Second retry should be around initialDelayMs * 2 (200ms) + jitter
expect(delays[0]).toBeLessThanOrEqual(200);
// Second retry: exponentialDelay = 200ms, jitter = 0-200ms, total = 200-400ms
expect(delays[1]).toBeGreaterThanOrEqual(200);
expect(delays[1]).toBeLessThanOrEqual(1000);
expect(delays[1]).toBeLessThanOrEqual(400);
});
it("respects maxDelayMs cap", async () => {
@@ -344,4 +344,27 @@ describe("createMattermostDirectChannelWithRetry", () => {
expect(capturedSignal).toBeDefined();
expect(capturedSignal).toBeInstanceOf(AbortSignal);
});
it("retries on 5xx even if error message contains 4xx substring", async () => {
// This tests the fix for the ordering bug: 503 with "upstream 404" should be retried
mockFetch
.mockRejectedValueOnce(new Error("Mattermost API 503: upstream returned 404 Not Found"))
.mockResolvedValueOnce({
ok: true,
status: 201,
headers: new Headers({ "content-type": "application/json" }),
json: async () => ({ id: "dm-channel-5xx-with-404" }),
} as Response);
const client = createMockClient();
const result = await createMattermostDirectChannelWithRetry(client, ["user-1", "user-2"], {
maxRetries: 3,
initialDelayMs: 10,
});
// Should retry and succeed on second attempt
expect(mockFetch).toHaveBeenCalledTimes(2);
expect(result.id).toBe("dm-channel-5xx-with-404");
});
});

View File

@@ -235,11 +235,12 @@ export async function createMattermostDirectChannelWithRetry(
throw lastError;
}
// Calculate exponential backoff delay with jitter
const delayMs = Math.min(
initialDelayMs * Math.pow(2, attempt) + Math.random() * 1000,
maxDelayMs,
);
// Calculate exponential backoff delay with full-jitter
// Jitter is proportional to the exponential delay, not a fixed 1000ms
// This ensures backoff behaves correctly for small delay configurations
const exponentialDelay = initialDelayMs * Math.pow(2, attempt);
const jitter = Math.random() * exponentialDelay;
const delayMs = Math.min(exponentialDelay + jitter, maxDelayMs);
if (onRetry) {
onRetry(attempt + 1, delayMs, lastError);
@@ -261,6 +262,13 @@ function isRetryableError(error: Error): boolean {
return true;
}
// Retry on 5xx server errors FIRST (before checking 4xx)
// This prevents misclassification when a 5xx error detail contains a 4xx substring
// e.g., "503 Service Unavailable: upstream returned 404"
if (/\b5\d{2}\b/.test(message)) {
return true;
}
// Check for explicit 4xx status codes - these are client errors and should NOT be retried
// (except 429 which is handled above)
const clientErrorMatch = message.match(/\b4\d{2}\b/);
@@ -271,11 +279,6 @@ function isRetryableError(error: Error): boolean {
}
}
// Retry on 5xx server errors
if (/\b5\d{2}\b/.test(message)) {
return true;
}
// Retry on network/transient errors only if no explicit HTTP status code is present
// This avoids false positives like "400 Bad Request: connection timed out"
const hasExplicitStatusCode = /\b\d{3}\b/.test(message);