mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-21 06:51:01 +00:00
fix(mattermost): harden DM channel retry handling
This commit is contained in:
@@ -133,6 +133,7 @@ Docs: https://docs.openclaw.ai
|
||||
- macOS/launch at login: stop emitting `KeepAlive` for the desktop app launch agent so OpenClaw no longer relaunches immediately after a manual quit while launch at login remains enabled. (#40213) Thanks @stablegenius49.
|
||||
- ACP/gateway startup: use direct Telegram and Discord startup/status helpers instead of routing probes through the plugin runtime, and prepend the selected daemon Node bin dir to service PATH so plugin-local installs can still find `npm` and `pnpm`.
|
||||
- ACP/configured bindings: reinitialize configured ACP sessions that are stuck in `error` state instead of reusing the failed runtime.
|
||||
- Mattermost/DM send: retry transient direct-channel creation failures for DM deliveries, with configurable backoff and per-request timeout. (#42398) Thanks @JonathanJing.
|
||||
|
||||
## 2026.3.13
|
||||
|
||||
|
||||
@@ -191,6 +191,35 @@ OpenClaw resolves them **user-first**:
|
||||
|
||||
If you need deterministic behavior, always use the explicit prefixes (`user:<id>` / `channel:<id>`).
|
||||
|
||||
## DM channel retry
|
||||
|
||||
When OpenClaw sends to a Mattermost DM target and needs to resolve the direct channel first, it
|
||||
retries transient direct-channel creation failures by default.
|
||||
|
||||
Use `channels.mattermost.dmChannelRetry` to tune that behavior globally for the Mattermost plugin,
|
||||
or `channels.mattermost.accounts.<id>.dmChannelRetry` for one account.
|
||||
|
||||
```json5
|
||||
{
|
||||
channels: {
|
||||
mattermost: {
|
||||
dmChannelRetry: {
|
||||
maxRetries: 3,
|
||||
initialDelayMs: 1000,
|
||||
maxDelayMs: 10000,
|
||||
timeoutMs: 30000,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
Notes:
|
||||
|
||||
- This applies only to DM channel creation (`/api/v4/channels/direct`), not every Mattermost API call.
|
||||
- Retries apply to transient failures such as rate limits, 5xx responses, and network or timeout errors.
|
||||
- 4xx client errors other than `429` are treated as permanent and are not retried.
|
||||
|
||||
## Reactions (message tool)
|
||||
|
||||
- Use `message action=react` with `channel=mattermost`.
|
||||
|
||||
@@ -1,9 +1,5 @@
|
||||
import { describe, expect, it, vi, beforeEach } from "vitest";
|
||||
import {
|
||||
createMattermostClient,
|
||||
createMattermostDirectChannelWithRetry,
|
||||
type CreateDmChannelRetryOptions,
|
||||
} from "./client.js";
|
||||
import { createMattermostClient, createMattermostDirectChannelWithRetry } from "./client.js";
|
||||
|
||||
describe("createMattermostDirectChannelWithRetry", () => {
|
||||
const mockFetch = vi.fn();
|
||||
@@ -20,6 +16,13 @@ describe("createMattermostDirectChannelWithRetry", () => {
|
||||
});
|
||||
}
|
||||
|
||||
function createFetchFailedError(params: { message: string; code?: string }): TypeError {
|
||||
const cause = Object.assign(new Error(params.message), {
|
||||
code: params.code,
|
||||
});
|
||||
return Object.assign(new TypeError("fetch failed"), { cause });
|
||||
}
|
||||
|
||||
it("succeeds on first attempt without retries", async () => {
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
@@ -178,6 +181,32 @@ describe("createMattermostDirectChannelWithRetry", () => {
|
||||
expect(mockFetch).toHaveBeenCalledTimes(3);
|
||||
});
|
||||
|
||||
it("retries on fetch failed errors when the cause carries a transient code", async () => {
|
||||
mockFetch
|
||||
.mockRejectedValueOnce(
|
||||
createFetchFailedError({
|
||||
message: "connect ECONNREFUSED 127.0.0.1:81",
|
||||
code: "ECONNREFUSED",
|
||||
}),
|
||||
)
|
||||
.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
status: 201,
|
||||
headers: new Headers({ "content-type": "application/json" }),
|
||||
json: async () => ({ id: "dm-channel-fetch-failed" }),
|
||||
} as Response);
|
||||
|
||||
const client = createMockClient();
|
||||
|
||||
const result = await createMattermostDirectChannelWithRetry(client, ["user-1", "user-2"], {
|
||||
maxRetries: 3,
|
||||
initialDelayMs: 10,
|
||||
});
|
||||
|
||||
expect(result.id).toBe("dm-channel-fetch-failed");
|
||||
expect(mockFetch).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it("does not retry on 4xx client errors (except 429)", async () => {
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: false,
|
||||
|
||||
@@ -190,6 +190,47 @@ export type CreateDmChannelRetryOptions = {
|
||||
onRetry?: (attempt: number, delayMs: number, error: Error) => void;
|
||||
};
|
||||
|
||||
const RETRYABLE_NETWORK_ERROR_CODES = new Set([
|
||||
"ECONNRESET",
|
||||
"ECONNREFUSED",
|
||||
"ETIMEDOUT",
|
||||
"ESOCKETTIMEDOUT",
|
||||
"ECONNABORTED",
|
||||
"ENOTFOUND",
|
||||
"EAI_AGAIN",
|
||||
"EHOSTUNREACH",
|
||||
"ENETUNREACH",
|
||||
"EPIPE",
|
||||
"UND_ERR_CONNECT_TIMEOUT",
|
||||
"UND_ERR_DNS_RESOLVE_FAILED",
|
||||
"UND_ERR_CONNECT",
|
||||
"UND_ERR_SOCKET",
|
||||
"UND_ERR_HEADERS_TIMEOUT",
|
||||
"UND_ERR_BODY_TIMEOUT",
|
||||
]);
|
||||
|
||||
const RETRYABLE_NETWORK_ERROR_NAMES = new Set([
|
||||
"AbortError",
|
||||
"TimeoutError",
|
||||
"ConnectTimeoutError",
|
||||
"HeadersTimeoutError",
|
||||
"BodyTimeoutError",
|
||||
]);
|
||||
|
||||
const RETRYABLE_NETWORK_MESSAGE_SNIPPETS = [
|
||||
"network error",
|
||||
"timeout",
|
||||
"timed out",
|
||||
"abort",
|
||||
"connection refused",
|
||||
"econnreset",
|
||||
"econnrefused",
|
||||
"etimedout",
|
||||
"enotfound",
|
||||
"socket hang up",
|
||||
"getaddrinfo",
|
||||
];
|
||||
|
||||
/**
|
||||
* Creates a Mattermost DM channel with exponential backoff retry logic.
|
||||
* Retries on transient errors (429, 5xx, network errors) but not on
|
||||
@@ -255,27 +296,37 @@ export async function createMattermostDirectChannelWithRetry(
|
||||
}
|
||||
|
||||
function isRetryableError(error: Error): boolean {
|
||||
const message = error.message.toLowerCase();
|
||||
const candidates = collectErrorCandidates(error);
|
||||
const messages = candidates
|
||||
.map((candidate) => readErrorMessage(candidate)?.toLowerCase())
|
||||
.filter((message): message is string => Boolean(message));
|
||||
|
||||
// Retry on 5xx server errors FIRST (before checking 4xx)
|
||||
// Use "mattermost api" prefix to avoid matching port numbers (e.g., :443) or IP octets
|
||||
// This prevents misclassification when a 5xx error detail contains a 4xx substring
|
||||
// e.g., "Mattermost API 503: upstream returned 404"
|
||||
if (/mattermost api 5\d{2}\b/.test(message)) {
|
||||
if (messages.some((message) => /mattermost api 5\d{2}\b/.test(message))) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check for explicit 429 rate limiting FIRST (before generic "429" text match)
|
||||
// This avoids retrying when error detail contains "429" but it's not the status code
|
||||
if (/mattermost api 429\b/.test(message) || message.includes("too many requests")) {
|
||||
if (
|
||||
messages.some(
|
||||
(message) => /mattermost api 429\b/.test(message) || message.includes("too many requests"),
|
||||
)
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check for explicit 4xx status codes - these are client errors and should NOT be retried
|
||||
// (except 429 which is handled above)
|
||||
// Use "mattermost api" prefix to avoid matching port numbers like :443
|
||||
const clientErrorMatch = message.match(/mattermost api (4\d{2})\b/);
|
||||
if (clientErrorMatch) {
|
||||
for (const message of messages) {
|
||||
const clientErrorMatch = message.match(/mattermost api (4\d{2})\b/);
|
||||
if (!clientErrorMatch) {
|
||||
continue;
|
||||
}
|
||||
const statusCode = parseInt(clientErrorMatch[1], 10);
|
||||
if (statusCode >= 400 && statusCode < 500) {
|
||||
return false;
|
||||
@@ -286,25 +337,95 @@ function isRetryableError(error: Error): boolean {
|
||||
// This avoids false positives like:
|
||||
// - "400 Bad Request: connection timed out" (has status code)
|
||||
// - "connect ECONNRESET 104.18.32.10:443" (has port number, not status)
|
||||
const hasMattermostApiStatusCode = /mattermost api \d{3}\b/.test(message);
|
||||
const hasMattermostApiStatusCode = messages.some((message) =>
|
||||
/mattermost api \d{3}\b/.test(message),
|
||||
);
|
||||
if (hasMattermostApiStatusCode) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Retry on network/transient errors (no explicit HTTP status code in message)
|
||||
const retryablePatterns = [
|
||||
"network error",
|
||||
"timeout",
|
||||
"abort",
|
||||
"connection refused",
|
||||
"econnreset",
|
||||
"econnrefused",
|
||||
"etimedout",
|
||||
"enotfound",
|
||||
"socket hang up",
|
||||
];
|
||||
const codes = candidates
|
||||
.map((candidate) => readErrorCode(candidate))
|
||||
.filter((code): code is string => Boolean(code));
|
||||
if (codes.some((code) => RETRYABLE_NETWORK_ERROR_CODES.has(code))) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return retryablePatterns.some((pattern) => message.includes(pattern));
|
||||
const names = candidates
|
||||
.map((candidate) => readErrorName(candidate))
|
||||
.filter((name): name is string => Boolean(name));
|
||||
if (names.some((name) => RETRYABLE_NETWORK_ERROR_NAMES.has(name))) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return messages.some((message) =>
|
||||
RETRYABLE_NETWORK_MESSAGE_SNIPPETS.some((pattern) => message.includes(pattern)),
|
||||
);
|
||||
}
|
||||
|
||||
function collectErrorCandidates(error: unknown): unknown[] {
|
||||
const queue: unknown[] = [error];
|
||||
const seen = new Set<unknown>();
|
||||
const candidates: unknown[] = [];
|
||||
|
||||
while (queue.length > 0) {
|
||||
const current = queue.shift();
|
||||
if (!current || seen.has(current)) {
|
||||
continue;
|
||||
}
|
||||
seen.add(current);
|
||||
candidates.push(current);
|
||||
|
||||
if (typeof current !== "object") {
|
||||
continue;
|
||||
}
|
||||
|
||||
const nested = current as {
|
||||
cause?: unknown;
|
||||
reason?: unknown;
|
||||
errors?: unknown;
|
||||
};
|
||||
queue.push(nested.cause, nested.reason);
|
||||
if (Array.isArray(nested.errors)) {
|
||||
queue.push(...nested.errors);
|
||||
}
|
||||
}
|
||||
|
||||
return candidates;
|
||||
}
|
||||
|
||||
function readErrorMessage(error: unknown): string | undefined {
|
||||
if (!error || typeof error !== "object") {
|
||||
return undefined;
|
||||
}
|
||||
const message = (error as { message?: unknown }).message;
|
||||
return typeof message === "string" && message.trim() ? message : undefined;
|
||||
}
|
||||
|
||||
function readErrorName(error: unknown): string | undefined {
|
||||
if (!error || typeof error !== "object") {
|
||||
return undefined;
|
||||
}
|
||||
const name = (error as { name?: unknown }).name;
|
||||
return typeof name === "string" && name.trim() ? name : undefined;
|
||||
}
|
||||
|
||||
function readErrorCode(error: unknown): string | undefined {
|
||||
if (!error || typeof error !== "object") {
|
||||
return undefined;
|
||||
}
|
||||
const { code, errno } = error as {
|
||||
code?: unknown;
|
||||
errno?: unknown;
|
||||
};
|
||||
const raw = typeof code === "string" && code.trim() ? code : errno;
|
||||
if (typeof raw === "string" && raw.trim()) {
|
||||
return raw.trim().toUpperCase();
|
||||
}
|
||||
if (typeof raw === "number" && Number.isFinite(raw)) {
|
||||
return String(raw);
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
|
||||
@@ -541,7 +541,7 @@ describe("sendMessageMattermost user-first resolution", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("opts.dmRetryOptions overrides account config dmChannelRetry", async () => {
|
||||
it("opts.dmRetryOptions overrides provided fields and preserves account defaults", async () => {
|
||||
const userId = "hhhhhh8888888888hhhhhh8888"; // 26 chars
|
||||
mockState.resolveMattermostAccount.mockReturnValue({
|
||||
accountId: "default",
|
||||
@@ -570,5 +570,12 @@ describe("sendMessageMattermost user-first resolution", () => {
|
||||
["bot-id", userId],
|
||||
expect.objectContaining(overrideOptions),
|
||||
);
|
||||
expect(mockState.createMattermostDirectChannelWithRetry).toHaveBeenCalledWith(
|
||||
{},
|
||||
["bot-id", userId],
|
||||
expect.objectContaining({
|
||||
initialDelayMs: 1000,
|
||||
}),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -193,6 +193,31 @@ type ResolveTargetChannelIdParams = {
|
||||
logger?: { debug?: (msg: string) => void; warn?: (msg: string) => void };
|
||||
};
|
||||
|
||||
function mergeDmRetryOptions(
|
||||
base?: CreateDmChannelRetryOptions,
|
||||
override?: CreateDmChannelRetryOptions,
|
||||
): CreateDmChannelRetryOptions | undefined {
|
||||
const merged: CreateDmChannelRetryOptions = {
|
||||
maxRetries: override?.maxRetries ?? base?.maxRetries,
|
||||
initialDelayMs: override?.initialDelayMs ?? base?.initialDelayMs,
|
||||
maxDelayMs: override?.maxDelayMs ?? base?.maxDelayMs,
|
||||
timeoutMs: override?.timeoutMs ?? base?.timeoutMs,
|
||||
onRetry: override?.onRetry,
|
||||
};
|
||||
|
||||
if (
|
||||
merged.maxRetries === undefined &&
|
||||
merged.initialDelayMs === undefined &&
|
||||
merged.maxDelayMs === undefined &&
|
||||
merged.timeoutMs === undefined &&
|
||||
merged.onRetry === undefined
|
||||
) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
return merged;
|
||||
}
|
||||
|
||||
async function resolveTargetChannelId(params: ResolveTargetChannelIdParams): Promise<string> {
|
||||
if (params.target.kind === "channel") {
|
||||
return params.target.id;
|
||||
@@ -284,17 +309,15 @@ async function resolveMattermostSendContext(
|
||||
? { kind: "channel" as const, id: opaqueTarget.id }
|
||||
: parseMattermostTarget(trimmedTo);
|
||||
// Build retry options from account config, allowing opts to override
|
||||
const accountRetryConfig = account.config.dmChannelRetry;
|
||||
const dmRetryOptions: CreateDmChannelRetryOptions | undefined =
|
||||
opts.dmRetryOptions ??
|
||||
(accountRetryConfig
|
||||
? {
|
||||
maxRetries: accountRetryConfig.maxRetries,
|
||||
initialDelayMs: accountRetryConfig.initialDelayMs,
|
||||
maxDelayMs: accountRetryConfig.maxDelayMs,
|
||||
timeoutMs: accountRetryConfig.timeoutMs,
|
||||
}
|
||||
: undefined);
|
||||
const accountRetryConfig: CreateDmChannelRetryOptions | undefined = account.config.dmChannelRetry
|
||||
? {
|
||||
maxRetries: account.config.dmChannelRetry.maxRetries,
|
||||
initialDelayMs: account.config.dmChannelRetry.initialDelayMs,
|
||||
maxDelayMs: account.config.dmChannelRetry.maxDelayMs,
|
||||
timeoutMs: account.config.dmChannelRetry.timeoutMs,
|
||||
}
|
||||
: undefined;
|
||||
const dmRetryOptions = mergeDmRetryOptions(accountRetryConfig, opts.dmRetryOptions);
|
||||
|
||||
const channelId = await resolveTargetChannelId({
|
||||
target,
|
||||
|
||||
Reference in New Issue
Block a user