diff --git a/CHANGELOG.md b/CHANGELOG.md index 4678b085f7e..a1b535e6ff2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ Docs: https://docs.openclaw.ai - Gateway/chat: keep duplicate attachment-backed `chat.send` retries with the same idempotency key on the documented in-flight path so aborts still target the real active run. Fixes #70139. Thanks @Feelw00. - Plugins: share package entrypoint resolution between install and discovery, reject mismatched `runtimeExtensions`, and cache bundled runtime-dependency manifest reads during scans. Thanks @codex. - WhatsApp/Web: keep quiet but healthy linked-device sessions connected by basing the watchdog on WhatsApp Web transport activity, while retaining a longer app-silence cap so frame activity cannot mask a stuck session forever. Fixes #70678; carries forward the focused #71466 approach and keeps #63939 as related configurable-timeout follow-up. Thanks @vincentkoc and @oromeis. +- Discord/gateway: count failed health-monitor restart attempts toward cooldown and hourly caps, and evict stale account lifecycle state during channel reloads so repeated Discord gateway recovery cannot loop on old status. Fixes #38596. (#40413) Thanks @jellyAI-dev and @vashquez. ## 2026.4.26 diff --git a/src/gateway/channel-health-monitor.test.ts b/src/gateway/channel-health-monitor.test.ts index 14fd42c4c1a..a1038f747b6 100644 --- a/src/gateway/channel-health-monitor.test.ts +++ b/src/gateway/channel-health-monitor.test.ts @@ -437,6 +437,31 @@ describe("channel-health-monitor", () => { monitor.stop(); }); + it("counts failed restart attempts toward cooldown and hourly caps", async () => { + const manager = createSnapshotManager( + { + discord: { + default: managedStoppedAccount("keeps crashing"), + }, + }, + { + startChannel: vi.fn(async () => { + throw new Error("startup failed"); + }), + }, + ); + const monitor = startDefaultMonitor(manager, { + checkIntervalMs: 1_000, + cooldownCycles: 1, + maxRestartsPerHour: 1, + }); + + await vi.advanceTimersByTimeAsync(5_001); + + expect(manager.startChannel).toHaveBeenCalledTimes(1); + monitor.stop(); + }); + it("runs checks single-flight when restart work is still in progress", async () => { let releaseStart: (() => void) | undefined; const startGate = new Promise((resolve) => { diff --git a/src/gateway/channel-health-monitor.ts b/src/gateway/channel-health-monitor.ts index 90bf3851a3a..a469723f330 100644 --- a/src/gateway/channel-health-monitor.ts +++ b/src/gateway/channel-health-monitor.ts @@ -157,15 +157,16 @@ export function startChannelHealthMonitor(deps: ChannelHealthMonitorDeps): Chann log.info?.(`[${channelId}:${accountId}] health-monitor: restarting (reason: ${reason})`); + record.lastRestartAt = now; + record.restartsThisHour.push({ at: now }); + restartRecords.set(key, record); + try { if (status.running) { await channelManager.stopChannel(channelId as ChannelId, accountId); } channelManager.resetRestartAttempts(channelId as ChannelId, accountId); await channelManager.startChannel(channelId as ChannelId, accountId); - record.lastRestartAt = now; - record.restartsThisHour.push({ at: now }); - restartRecords.set(key, record); } catch (err) { log.error?.( `[${channelId}:${accountId}] health-monitor: restart failed: ${String(err)}`, diff --git a/src/gateway/server-channels.test.ts b/src/gateway/server-channels.test.ts index 25eaad2e9b6..01ec8e4fdb8 100644 --- a/src/gateway/server-channels.test.ts +++ b/src/gateway/server-channels.test.ts @@ -50,6 +50,7 @@ function createTestPlugin(params?: { order?: number; account?: TestAccount; startAccount?: NonNullable["gateway"]>["startAccount"]; + listAccountIds?: ChannelPlugin["config"]["listAccountIds"]; includeDescribeAccount?: boolean; describeAccount?: ChannelPlugin["config"]["describeAccount"]; resolveAccount?: ChannelPlugin["config"]["resolveAccount"]; @@ -59,7 +60,7 @@ function createTestPlugin(params?: { const account = params?.account ?? { enabled: true, configured: true }; const includeDescribeAccount = params?.includeDescribeAccount !== false; const config: ChannelPlugin["config"] = { - listAccountIds: () => [DEFAULT_ACCOUNT_ID], + listAccountIds: params?.listAccountIds ?? (() => [DEFAULT_ACCOUNT_ID]), resolveAccount: params?.resolveAccount ?? (() => account), isEnabled: (resolved) => resolved.enabled !== false, ...(params?.isConfigured ? { isConfigured: params.isConfigured } : {}), @@ -436,6 +437,35 @@ describe("server-channels auto restart", () => { expect(succeedingStart).toHaveBeenCalledTimes(1); }); + it("evicts stale account lifecycle state during whole-channel reload", async () => { + let accountIds = [DEFAULT_ACCOUNT_ID]; + const startAccount = vi.fn( + async ({ abortSignal }: { abortSignal: AbortSignal }) => + await new Promise((resolve) => { + abortSignal.addEventListener("abort", () => resolve(), { once: true }); + }), + ); + installTestRegistry(createTestPlugin({ startAccount, listAccountIds: () => accountIds })); + const manager = createManager(); + + await manager.startChannel("discord"); + + accountIds = []; + await manager.stopChannel("discord"); + await manager.startChannel("discord"); + + accountIds = [DEFAULT_ACCOUNT_ID]; + await manager.startChannel("discord"); + + const snapshot = manager.getRuntimeSnapshot(); + const account = snapshot.channelAccounts.discord?.[DEFAULT_ACCOUNT_ID]; + expect(startAccount).toHaveBeenCalledTimes(2); + expect(account?.reconnectAttempts).toBe(0); + expect(account?.lastStopAt).toBeUndefined(); + + await manager.stopChannel("discord"); + }); + it("reuses plugin account resolution for health monitor overrides", () => { installTestRegistry( createTestPlugin({ diff --git a/src/gateway/server-channels.ts b/src/gateway/server-channels.ts index e4e487344c7..765f170267e 100644 --- a/src/gateway/server-channels.ts +++ b/src/gateway/server-channels.ts @@ -282,6 +282,27 @@ export function createChannelManager(opts: ChannelManagerOptions): ChannelManage return channelRuntime ?? resolveChannelRuntime?.(); }; + const evictStaleChannelAccountState = ( + channelId: ChannelId, + store: ChannelRuntimeStore, + accountIds: readonly string[], + ) => { + const activeAccountIds = new Set(accountIds); + for (const id of store.runtimes.keys()) { + if ( + activeAccountIds.has(id) || + store.aborts.has(id) || + store.starting.has(id) || + store.tasks.has(id) + ) { + continue; + } + store.runtimes.delete(id); + restartAttempts.delete(restartKey(channelId, id)); + manuallyStopped.delete(restartKey(channelId, id)); + } + }; + const startChannelInternal = async ( channelId: ChannelId, accountId?: string, @@ -297,6 +318,9 @@ export function createChannelManager(opts: ChannelManagerOptions): ChannelManage resetDirectoryCache({ channel: channelId, accountId }); const store = getStore(channelId); const accountIds = accountId ? [accountId] : plugin.config.listAccountIds(cfg); + if (!accountId) { + evictStaleChannelAccountState(channelId, store, accountIds); + } if (accountIds.length === 0) { return; }