From 4e6080d4768d317ebb7ec303b8735aaf7239ceaf Mon Sep 17 00:00:00 2001 From: Jonathan Jing Date: Tue, 10 Mar 2026 10:57:07 -0700 Subject: [PATCH] fix(mattermost): address Greptile review - isRetryableError ordering and jitter - Fix isRetryableError: check 5xx BEFORE 4xx to prevent misclassification when 5xx error detail contains 4xx substring (e.g., '503: upstream 404') - Fix jitter: use proportional jitter (full-jitter pattern) instead of hardcoded 1000ms. Jitter is now 0-100% of exponential delay - Update tests to reflect new jitter behavior - Add test for 5xx with 4xx substring in error message --- .../src/mattermost/client.retry.test.ts | 31 ++++++++++++++++--- .../mattermost/src/mattermost/client.ts | 23 ++++++++------ 2 files changed, 40 insertions(+), 14 deletions(-) diff --git a/extensions/mattermost/src/mattermost/client.retry.test.ts b/extensions/mattermost/src/mattermost/client.retry.test.ts index 79a8f4c27cc..2762908e49e 100644 --- a/extensions/mattermost/src/mattermost/client.retry.test.ts +++ b/extensions/mattermost/src/mattermost/client.retry.test.ts @@ -239,12 +239,12 @@ describe("createMattermostDirectChannelWithRetry", () => { }); expect(delays).toHaveLength(2); - // First retry should be around initialDelayMs (100ms) + jitter + // First retry: exponentialDelay = 100ms, jitter = 0-100ms, total = 100-200ms expect(delays[0]).toBeGreaterThanOrEqual(100); - expect(delays[0]).toBeLessThanOrEqual(1000); - // Second retry should be around initialDelayMs * 2 (200ms) + jitter + expect(delays[0]).toBeLessThanOrEqual(200); + // Second retry: exponentialDelay = 200ms, jitter = 0-200ms, total = 200-400ms expect(delays[1]).toBeGreaterThanOrEqual(200); - expect(delays[1]).toBeLessThanOrEqual(1000); + expect(delays[1]).toBeLessThanOrEqual(400); }); it("respects maxDelayMs cap", async () => { @@ -344,4 +344,27 @@ describe("createMattermostDirectChannelWithRetry", () => { expect(capturedSignal).toBeDefined(); expect(capturedSignal).toBeInstanceOf(AbortSignal); }); + + it("retries on 5xx even if error message contains 4xx substring", async () => { + // This tests the fix for the ordering bug: 503 with "upstream 404" should be retried + mockFetch + .mockRejectedValueOnce(new Error("Mattermost API 503: upstream returned 404 Not Found")) + .mockResolvedValueOnce({ + ok: true, + status: 201, + headers: new Headers({ "content-type": "application/json" }), + json: async () => ({ id: "dm-channel-5xx-with-404" }), + } as Response); + + const client = createMockClient(); + + const result = await createMattermostDirectChannelWithRetry(client, ["user-1", "user-2"], { + maxRetries: 3, + initialDelayMs: 10, + }); + + // Should retry and succeed on second attempt + expect(mockFetch).toHaveBeenCalledTimes(2); + expect(result.id).toBe("dm-channel-5xx-with-404"); + }); }); diff --git a/extensions/mattermost/src/mattermost/client.ts b/extensions/mattermost/src/mattermost/client.ts index 524ff0cfba7..8a9d86557a0 100644 --- a/extensions/mattermost/src/mattermost/client.ts +++ b/extensions/mattermost/src/mattermost/client.ts @@ -235,11 +235,12 @@ export async function createMattermostDirectChannelWithRetry( throw lastError; } - // Calculate exponential backoff delay with jitter - const delayMs = Math.min( - initialDelayMs * Math.pow(2, attempt) + Math.random() * 1000, - maxDelayMs, - ); + // Calculate exponential backoff delay with full-jitter + // Jitter is proportional to the exponential delay, not a fixed 1000ms + // This ensures backoff behaves correctly for small delay configurations + const exponentialDelay = initialDelayMs * Math.pow(2, attempt); + const jitter = Math.random() * exponentialDelay; + const delayMs = Math.min(exponentialDelay + jitter, maxDelayMs); if (onRetry) { onRetry(attempt + 1, delayMs, lastError); @@ -261,6 +262,13 @@ function isRetryableError(error: Error): boolean { return true; } + // Retry on 5xx server errors FIRST (before checking 4xx) + // This prevents misclassification when a 5xx error detail contains a 4xx substring + // e.g., "503 Service Unavailable: upstream returned 404" + if (/\b5\d{2}\b/.test(message)) { + return true; + } + // Check for explicit 4xx status codes - these are client errors and should NOT be retried // (except 429 which is handled above) const clientErrorMatch = message.match(/\b4\d{2}\b/); @@ -271,11 +279,6 @@ function isRetryableError(error: Error): boolean { } } - // Retry on 5xx server errors - if (/\b5\d{2}\b/.test(message)) { - return true; - } - // Retry on network/transient errors only if no explicit HTTP status code is present // This avoids false positives like "400 Bad Request: connection timed out" const hasExplicitStatusCode = /\b\d{3}\b/.test(message);