mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-12 15:30:39 +00:00
fix(gateway): skip stale-socket restarts for Telegram polling (openclaw#38405)
Verified: - pnpm build - pnpm check - pnpm test:macmini Co-authored-by: ql-wade <262266039+ql-wade@users.noreply.github.com>
This commit is contained in:
@@ -226,6 +226,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Feishu/reply delivery reliability: disable block streaming in Feishu reply options so plain-text auto-render replies are no longer silently dropped before final delivery. (#38258) Thanks @xinhuagu.
|
||||
- Agents/reply MEDIA delivery: normalize local assistant `MEDIA:` paths before block/final delivery, keep media dedupe aligned with message-tool sends, and contain malformed media normalization failures so generated files send reliably instead of falling back to empty responses. (#38572) Thanks @obviyus.
|
||||
- Sessions/bootstrap cache rollover invalidation: clear cached workspace bootstrap snapshots whenever an existing `sessionKey` rolls to a new `sessionId` across auto-reply, command, and isolated cron session resolvers, so `AGENTS.md`/`MEMORY.md`/`USER.md` updates are reloaded after daily, idle, or forced session resets instead of staying stale until gateway restart. (#38494) Thanks @LivingInDrm.
|
||||
- Gateway/Telegram polling health monitor: skip stale-socket restarts for Telegram long-polling channels and thread channel identity through shared health evaluation so polling connections are not restarted on the WebSocket stale-socket heuristic. (#38395) Thanks @ql-wade and @Takhoffman.
|
||||
|
||||
## 2026.3.2
|
||||
|
||||
|
||||
@@ -122,6 +122,7 @@ export function startChannelHealthMonitor(deps: ChannelHealthMonitorDeps): Chann
|
||||
continue;
|
||||
}
|
||||
const healthPolicy: ChannelHealthPolicy = {
|
||||
channelId,
|
||||
now,
|
||||
staleEventThresholdMs: timing.staleEventThresholdMs,
|
||||
channelConnectGraceMs: timing.channelConnectGraceMs,
|
||||
|
||||
@@ -10,6 +10,7 @@ describe("evaluateChannelHealth", () => {
|
||||
configured: true,
|
||||
},
|
||||
{
|
||||
channelId: "discord",
|
||||
now: 100_000,
|
||||
channelConnectGraceMs: 10_000,
|
||||
staleEventThresholdMs: 30_000,
|
||||
@@ -28,6 +29,7 @@ describe("evaluateChannelHealth", () => {
|
||||
lastStartAt: 95_000,
|
||||
},
|
||||
{
|
||||
channelId: "discord",
|
||||
now: 100_000,
|
||||
channelConnectGraceMs: 10_000,
|
||||
staleEventThresholdMs: 30_000,
|
||||
@@ -48,6 +50,7 @@ describe("evaluateChannelHealth", () => {
|
||||
lastRunActivityAt: now - 30_000,
|
||||
},
|
||||
{
|
||||
channelId: "discord",
|
||||
now,
|
||||
channelConnectGraceMs: 10_000,
|
||||
staleEventThresholdMs: 30_000,
|
||||
@@ -68,6 +71,7 @@ describe("evaluateChannelHealth", () => {
|
||||
lastRunActivityAt: now - 26 * 60_000,
|
||||
},
|
||||
{
|
||||
channelId: "discord",
|
||||
now,
|
||||
channelConnectGraceMs: 10_000,
|
||||
staleEventThresholdMs: 30_000,
|
||||
@@ -90,6 +94,7 @@ describe("evaluateChannelHealth", () => {
|
||||
lastRunActivityAt: now - 31_000,
|
||||
},
|
||||
{
|
||||
channelId: "discord",
|
||||
now,
|
||||
channelConnectGraceMs: 10_000,
|
||||
staleEventThresholdMs: 30_000,
|
||||
@@ -109,6 +114,7 @@ describe("evaluateChannelHealth", () => {
|
||||
lastEventAt: null,
|
||||
},
|
||||
{
|
||||
channelId: "discord",
|
||||
now: 100_000,
|
||||
channelConnectGraceMs: 10_000,
|
||||
staleEventThresholdMs: 30_000,
|
||||
@@ -116,6 +122,26 @@ describe("evaluateChannelHealth", () => {
|
||||
);
|
||||
expect(evaluation).toEqual({ healthy: false, reason: "stale-socket" });
|
||||
});
|
||||
|
||||
it("skips stale-socket detection for telegram long-polling channels", () => {
|
||||
const evaluation = evaluateChannelHealth(
|
||||
{
|
||||
running: true,
|
||||
connected: true,
|
||||
enabled: true,
|
||||
configured: true,
|
||||
lastStartAt: 0,
|
||||
lastEventAt: null,
|
||||
},
|
||||
{
|
||||
channelId: "telegram",
|
||||
now: 100_000,
|
||||
channelConnectGraceMs: 10_000,
|
||||
staleEventThresholdMs: 30_000,
|
||||
},
|
||||
);
|
||||
expect(evaluation).toEqual({ healthy: true, reason: "healthy" });
|
||||
});
|
||||
});
|
||||
|
||||
describe("resolveChannelRestartReason", () => {
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
import type { ChannelId } from "../channels/plugins/types.js";
|
||||
|
||||
export type ChannelHealthSnapshot = {
|
||||
running?: boolean;
|
||||
connected?: boolean;
|
||||
@@ -28,6 +30,7 @@ export type ChannelHealthEvaluation = {
|
||||
};
|
||||
|
||||
export type ChannelHealthPolicy = {
|
||||
channelId: ChannelId;
|
||||
now: number;
|
||||
staleEventThresholdMs: number;
|
||||
channelConnectGraceMs: number;
|
||||
@@ -97,14 +100,19 @@ export function evaluateChannelHealth(
|
||||
if (snapshot.connected === false) {
|
||||
return { healthy: false, reason: "disconnected" };
|
||||
}
|
||||
if (snapshot.lastEventAt != null || snapshot.lastStartAt != null) {
|
||||
const upSince = snapshot.lastStartAt ?? 0;
|
||||
const upDuration = policy.now - upSince;
|
||||
if (upDuration > policy.staleEventThresholdMs) {
|
||||
const lastEvent = snapshot.lastEventAt ?? 0;
|
||||
const eventAge = policy.now - lastEvent;
|
||||
if (eventAge > policy.staleEventThresholdMs) {
|
||||
return { healthy: false, reason: "stale-socket" };
|
||||
// Skip stale-socket check for Telegram (long-polling mode). Each polling request
|
||||
// acts as a heartbeat, so the half-dead WebSocket scenario this check is designed
|
||||
// to catch does not apply to Telegram's long-polling architecture.
|
||||
if (policy.channelId !== "telegram") {
|
||||
if (snapshot.lastEventAt != null || snapshot.lastStartAt != null) {
|
||||
const upSince = snapshot.lastStartAt ?? 0;
|
||||
const upDuration = policy.now - upSince;
|
||||
if (upDuration > policy.staleEventThresholdMs) {
|
||||
const lastEvent = snapshot.lastEventAt ?? 0;
|
||||
const eventAge = policy.now - lastEvent;
|
||||
if (eventAge > policy.staleEventThresholdMs) {
|
||||
return { healthy: false, reason: "stale-socket" };
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -167,6 +167,28 @@ describe("createReadinessChecker", () => {
|
||||
vi.useRealTimers();
|
||||
});
|
||||
|
||||
it("keeps telegram long-polling channels ready without stale-socket classification", () => {
|
||||
vi.useFakeTimers();
|
||||
vi.setSystemTime(new Date("2026-03-06T12:00:00Z"));
|
||||
const startedAt = Date.now() - 31 * 60_000;
|
||||
const manager = createManager(
|
||||
snapshotWith({
|
||||
telegram: {
|
||||
running: true,
|
||||
connected: true,
|
||||
enabled: true,
|
||||
configured: true,
|
||||
lastStartAt: startedAt,
|
||||
lastEventAt: null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const readiness = createReadinessChecker({ channelManager: manager, startedAt });
|
||||
expect(readiness()).toEqual({ ready: true, failing: [], uptimeMs: 1_860_000 });
|
||||
vi.useRealTimers();
|
||||
});
|
||||
|
||||
it("caches readiness snapshots briefly to keep repeated probes cheap", () => {
|
||||
vi.useFakeTimers();
|
||||
vi.setSystemTime(new Date("2026-03-06T12:00:00Z"));
|
||||
|
||||
@@ -50,11 +50,6 @@ export function createReadinessChecker(deps: {
|
||||
|
||||
const snapshot = channelManager.getRuntimeSnapshot();
|
||||
const failing: string[] = [];
|
||||
const policy: ChannelHealthPolicy = {
|
||||
now,
|
||||
staleEventThresholdMs: DEFAULT_CHANNEL_STALE_EVENT_THRESHOLD_MS,
|
||||
channelConnectGraceMs: DEFAULT_CHANNEL_CONNECT_GRACE_MS,
|
||||
};
|
||||
|
||||
for (const [channelId, accounts] of Object.entries(snapshot.channelAccounts)) {
|
||||
if (!accounts) {
|
||||
@@ -64,6 +59,12 @@ export function createReadinessChecker(deps: {
|
||||
if (!accountSnapshot) {
|
||||
continue;
|
||||
}
|
||||
const policy: ChannelHealthPolicy = {
|
||||
now,
|
||||
staleEventThresholdMs: DEFAULT_CHANNEL_STALE_EVENT_THRESHOLD_MS,
|
||||
channelConnectGraceMs: DEFAULT_CHANNEL_CONNECT_GRACE_MS,
|
||||
channelId,
|
||||
};
|
||||
const health = evaluateChannelHealth(accountSnapshot, policy);
|
||||
if (!health.healthy && !shouldIgnoreReadinessFailure(accountSnapshot, health)) {
|
||||
failing.push(channelId);
|
||||
|
||||
Reference in New Issue
Block a user