mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 08:00:42 +00:00
fix(discord): escalate repeated health-monitor restarts
This commit is contained in:
@@ -19,6 +19,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Gateway/chat: keep duplicate attachment-backed `chat.send` retries with the same idempotency key on the documented in-flight path so aborts still target the real active run. Fixes #70139. Thanks @Feelw00.
|
||||
- Plugins: share package entrypoint resolution between install and discovery, reject mismatched `runtimeExtensions`, and cache bundled runtime-dependency manifest reads during scans. Thanks @codex.
|
||||
- WhatsApp/Web: keep quiet but healthy linked-device sessions connected by basing the watchdog on WhatsApp Web transport activity, while retaining a longer app-silence cap so frame activity cannot mask a stuck session forever. Fixes #70678; carries forward the focused #71466 approach and keeps #63939 as related configurable-timeout follow-up. Thanks @vincentkoc and @oromeis.
|
||||
- Discord/gateway: count failed health-monitor restart attempts toward cooldown and hourly caps, and evict stale account lifecycle state during channel reloads so repeated Discord gateway recovery cannot loop on old status. Fixes #38596. (#40413) Thanks @jellyAI-dev and @vashquez.
|
||||
|
||||
## 2026.4.26
|
||||
|
||||
|
||||
@@ -437,6 +437,31 @@ describe("channel-health-monitor", () => {
|
||||
monitor.stop();
|
||||
});
|
||||
|
||||
it("counts failed restart attempts toward cooldown and hourly caps", async () => {
|
||||
const manager = createSnapshotManager(
|
||||
{
|
||||
discord: {
|
||||
default: managedStoppedAccount("keeps crashing"),
|
||||
},
|
||||
},
|
||||
{
|
||||
startChannel: vi.fn(async () => {
|
||||
throw new Error("startup failed");
|
||||
}),
|
||||
},
|
||||
);
|
||||
const monitor = startDefaultMonitor(manager, {
|
||||
checkIntervalMs: 1_000,
|
||||
cooldownCycles: 1,
|
||||
maxRestartsPerHour: 1,
|
||||
});
|
||||
|
||||
await vi.advanceTimersByTimeAsync(5_001);
|
||||
|
||||
expect(manager.startChannel).toHaveBeenCalledTimes(1);
|
||||
monitor.stop();
|
||||
});
|
||||
|
||||
it("runs checks single-flight when restart work is still in progress", async () => {
|
||||
let releaseStart: (() => void) | undefined;
|
||||
const startGate = new Promise<void>((resolve) => {
|
||||
|
||||
@@ -157,15 +157,16 @@ export function startChannelHealthMonitor(deps: ChannelHealthMonitorDeps): Chann
|
||||
|
||||
log.info?.(`[${channelId}:${accountId}] health-monitor: restarting (reason: ${reason})`);
|
||||
|
||||
record.lastRestartAt = now;
|
||||
record.restartsThisHour.push({ at: now });
|
||||
restartRecords.set(key, record);
|
||||
|
||||
try {
|
||||
if (status.running) {
|
||||
await channelManager.stopChannel(channelId as ChannelId, accountId);
|
||||
}
|
||||
channelManager.resetRestartAttempts(channelId as ChannelId, accountId);
|
||||
await channelManager.startChannel(channelId as ChannelId, accountId);
|
||||
record.lastRestartAt = now;
|
||||
record.restartsThisHour.push({ at: now });
|
||||
restartRecords.set(key, record);
|
||||
} catch (err) {
|
||||
log.error?.(
|
||||
`[${channelId}:${accountId}] health-monitor: restart failed: ${String(err)}`,
|
||||
|
||||
@@ -50,6 +50,7 @@ function createTestPlugin(params?: {
|
||||
order?: number;
|
||||
account?: TestAccount;
|
||||
startAccount?: NonNullable<ChannelPlugin<TestAccount>["gateway"]>["startAccount"];
|
||||
listAccountIds?: ChannelPlugin<TestAccount>["config"]["listAccountIds"];
|
||||
includeDescribeAccount?: boolean;
|
||||
describeAccount?: ChannelPlugin<TestAccount>["config"]["describeAccount"];
|
||||
resolveAccount?: ChannelPlugin<TestAccount>["config"]["resolveAccount"];
|
||||
@@ -59,7 +60,7 @@ function createTestPlugin(params?: {
|
||||
const account = params?.account ?? { enabled: true, configured: true };
|
||||
const includeDescribeAccount = params?.includeDescribeAccount !== false;
|
||||
const config: ChannelPlugin<TestAccount>["config"] = {
|
||||
listAccountIds: () => [DEFAULT_ACCOUNT_ID],
|
||||
listAccountIds: params?.listAccountIds ?? (() => [DEFAULT_ACCOUNT_ID]),
|
||||
resolveAccount: params?.resolveAccount ?? (() => account),
|
||||
isEnabled: (resolved) => resolved.enabled !== false,
|
||||
...(params?.isConfigured ? { isConfigured: params.isConfigured } : {}),
|
||||
@@ -436,6 +437,35 @@ describe("server-channels auto restart", () => {
|
||||
expect(succeedingStart).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("evicts stale account lifecycle state during whole-channel reload", async () => {
|
||||
let accountIds = [DEFAULT_ACCOUNT_ID];
|
||||
const startAccount = vi.fn(
|
||||
async ({ abortSignal }: { abortSignal: AbortSignal }) =>
|
||||
await new Promise<void>((resolve) => {
|
||||
abortSignal.addEventListener("abort", () => resolve(), { once: true });
|
||||
}),
|
||||
);
|
||||
installTestRegistry(createTestPlugin({ startAccount, listAccountIds: () => accountIds }));
|
||||
const manager = createManager();
|
||||
|
||||
await manager.startChannel("discord");
|
||||
|
||||
accountIds = [];
|
||||
await manager.stopChannel("discord");
|
||||
await manager.startChannel("discord");
|
||||
|
||||
accountIds = [DEFAULT_ACCOUNT_ID];
|
||||
await manager.startChannel("discord");
|
||||
|
||||
const snapshot = manager.getRuntimeSnapshot();
|
||||
const account = snapshot.channelAccounts.discord?.[DEFAULT_ACCOUNT_ID];
|
||||
expect(startAccount).toHaveBeenCalledTimes(2);
|
||||
expect(account?.reconnectAttempts).toBe(0);
|
||||
expect(account?.lastStopAt).toBeUndefined();
|
||||
|
||||
await manager.stopChannel("discord");
|
||||
});
|
||||
|
||||
it("reuses plugin account resolution for health monitor overrides", () => {
|
||||
installTestRegistry(
|
||||
createTestPlugin({
|
||||
|
||||
@@ -282,6 +282,27 @@ export function createChannelManager(opts: ChannelManagerOptions): ChannelManage
|
||||
return channelRuntime ?? resolveChannelRuntime?.();
|
||||
};
|
||||
|
||||
const evictStaleChannelAccountState = (
|
||||
channelId: ChannelId,
|
||||
store: ChannelRuntimeStore,
|
||||
accountIds: readonly string[],
|
||||
) => {
|
||||
const activeAccountIds = new Set(accountIds);
|
||||
for (const id of store.runtimes.keys()) {
|
||||
if (
|
||||
activeAccountIds.has(id) ||
|
||||
store.aborts.has(id) ||
|
||||
store.starting.has(id) ||
|
||||
store.tasks.has(id)
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
store.runtimes.delete(id);
|
||||
restartAttempts.delete(restartKey(channelId, id));
|
||||
manuallyStopped.delete(restartKey(channelId, id));
|
||||
}
|
||||
};
|
||||
|
||||
const startChannelInternal = async (
|
||||
channelId: ChannelId,
|
||||
accountId?: string,
|
||||
@@ -297,6 +318,9 @@ export function createChannelManager(opts: ChannelManagerOptions): ChannelManage
|
||||
resetDirectoryCache({ channel: channelId, accountId });
|
||||
const store = getStore(channelId);
|
||||
const accountIds = accountId ? [accountId] : plugin.config.listAccountIds(cfg);
|
||||
if (!accountId) {
|
||||
evictStaleChannelAccountState(channelId, store, accountIds);
|
||||
}
|
||||
if (accountIds.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user