fix(telegram): recover polling after prolonged network outages

When grammY's runner exceeds maxRetryTime during a network outage,
runner.task() resolves cleanly. Previously, the polling loop treated
this as an intentional stop and exited permanently — killing Telegram
polling for the lifetime of the gateway process.

Now the outer loop detects this case and restarts with exponential
backoff, so polling recovers once connectivity is restored.

Also bumps maxRetryTime from 5 minutes to 60 minutes so the runner
itself survives longer outages (e.g. scheduled internet downtime)
without needing the outer loop restart path.
This commit is contained in:
sten moocow
2026-02-25 06:58:58 -05:00
committed by Peter Steinberger
parent ce8c67c314
commit 95c6b3a912

View File

@@ -45,8 +45,10 @@ export function createTelegramRunnerOptions(cfg: OpenClawConfig): RunOptions<unk
},
// Suppress grammY getUpdates stack traces; we log concise errors ourselves.
silent: true,
// Retry transient failures for a limited window before surfacing errors.
maxRetryTime: 5 * 60 * 1000,
// Retry transient failures before surfacing errors. Use a generous
// window so the runner survives prolonged outages (e.g. scheduled
// internet downtime) without the outer loop needing to restart it.
maxRetryTime: 60 * 60 * 1000,
retryInterval: "exponential",
},
};
@@ -277,14 +279,21 @@ export async function monitorTelegramProvider(opts: MonitorTelegramOpts = {}) {
try {
// runner.task() returns a promise that resolves when the runner stops
await runner.task();
if (!forceRestarted) {
if (opts.abortSignal?.aborted) {
return;
}
forceRestarted = false;
// The runner stopped on its own. This can happen when grammY's
// maxRetryTime is exceeded (e.g. prolonged network outage).
// Instead of exiting permanently, restart with backoff so polling
// recovers once connectivity is restored.
restartAttempts += 1;
const delayMs = computeBackoff(TELEGRAM_POLL_RESTART_POLICY, restartAttempts);
const reason = forceRestarted
? "unhandled network error"
: "runner stopped (maxRetryTime exceeded or graceful stop)";
forceRestarted = false;
log(
`Telegram polling runner restarted after unhandled network error; retrying in ${formatDurationPrecise(delayMs)}.`,
`Telegram polling runner stopped (${reason}); restarting in ${formatDurationPrecise(delayMs)}.`,
);
await sleepWithAbort(delayMs, opts.abortSignal);
continue;