fix(discord): escalate repeated health-monitor restarts

This commit is contained in:
Vincent Koc
2026-04-26 11:09:03 -07:00
committed by GitHub
parent 6b6dcafcee
commit b4cdd55f62
5 changed files with 85 additions and 4 deletions

View File

@@ -19,6 +19,7 @@ Docs: https://docs.openclaw.ai
- Gateway/chat: keep duplicate attachment-backed `chat.send` retries with the same idempotency key on the documented in-flight path so aborts still target the real active run. Fixes #70139. Thanks @Feelw00.
- Plugins: share package entrypoint resolution between install and discovery, reject mismatched `runtimeExtensions`, and cache bundled runtime-dependency manifest reads during scans. Thanks @codex.
- WhatsApp/Web: keep quiet but healthy linked-device sessions connected by basing the watchdog on WhatsApp Web transport activity, while retaining a longer app-silence cap so frame activity cannot mask a stuck session forever. Fixes #70678; carries forward the focused #71466 approach and keeps #63939 as related configurable-timeout follow-up. Thanks @vincentkoc and @oromeis.
- Discord/gateway: count failed health-monitor restart attempts toward cooldown and hourly caps, and evict stale account lifecycle state during channel reloads so repeated Discord gateway recovery cannot loop on old status. Fixes #38596. (#40413) Thanks @jellyAI-dev and @vashquez.
## 2026.4.26

View File

@@ -437,6 +437,31 @@ describe("channel-health-monitor", () => {
monitor.stop();
});
it("counts failed restart attempts toward cooldown and hourly caps", async () => {
const manager = createSnapshotManager(
{
discord: {
default: managedStoppedAccount("keeps crashing"),
},
},
{
startChannel: vi.fn(async () => {
throw new Error("startup failed");
}),
},
);
const monitor = startDefaultMonitor(manager, {
checkIntervalMs: 1_000,
cooldownCycles: 1,
maxRestartsPerHour: 1,
});
await vi.advanceTimersByTimeAsync(5_001);
expect(manager.startChannel).toHaveBeenCalledTimes(1);
monitor.stop();
});
it("runs checks single-flight when restart work is still in progress", async () => {
let releaseStart: (() => void) | undefined;
const startGate = new Promise<void>((resolve) => {

View File

@@ -157,15 +157,16 @@ export function startChannelHealthMonitor(deps: ChannelHealthMonitorDeps): Chann
log.info?.(`[${channelId}:${accountId}] health-monitor: restarting (reason: ${reason})`);
record.lastRestartAt = now;
record.restartsThisHour.push({ at: now });
restartRecords.set(key, record);
try {
if (status.running) {
await channelManager.stopChannel(channelId as ChannelId, accountId);
}
channelManager.resetRestartAttempts(channelId as ChannelId, accountId);
await channelManager.startChannel(channelId as ChannelId, accountId);
record.lastRestartAt = now;
record.restartsThisHour.push({ at: now });
restartRecords.set(key, record);
} catch (err) {
log.error?.(
`[${channelId}:${accountId}] health-monitor: restart failed: ${String(err)}`,

View File

@@ -50,6 +50,7 @@ function createTestPlugin(params?: {
order?: number;
account?: TestAccount;
startAccount?: NonNullable<ChannelPlugin<TestAccount>["gateway"]>["startAccount"];
listAccountIds?: ChannelPlugin<TestAccount>["config"]["listAccountIds"];
includeDescribeAccount?: boolean;
describeAccount?: ChannelPlugin<TestAccount>["config"]["describeAccount"];
resolveAccount?: ChannelPlugin<TestAccount>["config"]["resolveAccount"];
@@ -59,7 +60,7 @@ function createTestPlugin(params?: {
const account = params?.account ?? { enabled: true, configured: true };
const includeDescribeAccount = params?.includeDescribeAccount !== false;
const config: ChannelPlugin<TestAccount>["config"] = {
listAccountIds: () => [DEFAULT_ACCOUNT_ID],
listAccountIds: params?.listAccountIds ?? (() => [DEFAULT_ACCOUNT_ID]),
resolveAccount: params?.resolveAccount ?? (() => account),
isEnabled: (resolved) => resolved.enabled !== false,
...(params?.isConfigured ? { isConfigured: params.isConfigured } : {}),
@@ -436,6 +437,35 @@ describe("server-channels auto restart", () => {
expect(succeedingStart).toHaveBeenCalledTimes(1);
});
it("evicts stale account lifecycle state during whole-channel reload", async () => {
let accountIds = [DEFAULT_ACCOUNT_ID];
const startAccount = vi.fn(
async ({ abortSignal }: { abortSignal: AbortSignal }) =>
await new Promise<void>((resolve) => {
abortSignal.addEventListener("abort", () => resolve(), { once: true });
}),
);
installTestRegistry(createTestPlugin({ startAccount, listAccountIds: () => accountIds }));
const manager = createManager();
await manager.startChannel("discord");
accountIds = [];
await manager.stopChannel("discord");
await manager.startChannel("discord");
accountIds = [DEFAULT_ACCOUNT_ID];
await manager.startChannel("discord");
const snapshot = manager.getRuntimeSnapshot();
const account = snapshot.channelAccounts.discord?.[DEFAULT_ACCOUNT_ID];
expect(startAccount).toHaveBeenCalledTimes(2);
expect(account?.reconnectAttempts).toBe(0);
expect(account?.lastStopAt).toBeUndefined();
await manager.stopChannel("discord");
});
it("reuses plugin account resolution for health monitor overrides", () => {
installTestRegistry(
createTestPlugin({

View File

@@ -282,6 +282,27 @@ export function createChannelManager(opts: ChannelManagerOptions): ChannelManage
return channelRuntime ?? resolveChannelRuntime?.();
};
const evictStaleChannelAccountState = (
channelId: ChannelId,
store: ChannelRuntimeStore,
accountIds: readonly string[],
) => {
const activeAccountIds = new Set(accountIds);
for (const id of store.runtimes.keys()) {
if (
activeAccountIds.has(id) ||
store.aborts.has(id) ||
store.starting.has(id) ||
store.tasks.has(id)
) {
continue;
}
store.runtimes.delete(id);
restartAttempts.delete(restartKey(channelId, id));
manuallyStopped.delete(restartKey(channelId, id));
}
};
const startChannelInternal = async (
channelId: ChannelId,
accountId?: string,
@@ -297,6 +318,9 @@ export function createChannelManager(opts: ChannelManagerOptions): ChannelManage
resetDirectoryCache({ channel: channelId, accountId });
const store = getStore(channelId);
const accountIds = accountId ? [accountId] : plugin.config.listAccountIds(cfg);
if (!accountId) {
evictStaleChannelAccountState(channelId, store, accountIds);
}
if (accountIds.length === 0) {
return;
}