fix(whatsapp): downgrade recovered watchdog disconnects (#77026)

This commit is contained in:
Rubén Cuevas
2026-05-10 22:19:03 -04:00
committed by GitHub
parent f820a9892a
commit 35a85bcfb5
6 changed files with 115 additions and 5 deletions

View File

@@ -179,6 +179,7 @@ Docs: https://docs.openclaw.ai
- iMessage: emit a WARN log when an action is blocked because the imsg private API bridge is not attached, so operators see the silent-drop in `~/.openclaw/logs/openclaw.log` instead of having to read per-session trajectory JSONL `tool.result` payloads. Common after a gateway restart un-injects the dylib from Messages.app. (#80035) Thanks @omarshahine.
- Codex: cross-fill missing `thread.id` and `thread.sessionId` before schema validation so live Codex app-server responses that omit `sessionId` no longer fail `thread/start` or `thread/resume`. Fixes #80124. (#80137) Thanks @kagura-agent.
- Agents/Pi: wait for embedded abort cleanup to settle before releasing the session write lock, preventing follow-up turns from racing previous prompt teardown. (#80239) Thanks @samzong.
- WhatsApp: downgrade OpenClaw watchdog-triggered Web reconnects from runtime errors to recovery warnings and clear the recovered reconnect status after the next healthy connection. (#77026) Thanks @rubencu.
## 2026.5.9

View File

@@ -40,6 +40,7 @@ function requireOnMessage(
async function startWatchdogScenario(params: {
monitorWebChannel: typeof import("./auto-reply/monitor.js").monitorWebChannel;
statusSink?: Parameters<typeof startWebAutoReplyMonitor>[0]["statusSink"];
}) {
const sleep = vi.fn(async () => {});
const scripted = createScriptedWebListenerFactory();
@@ -50,6 +51,7 @@ async function startWatchdogScenario(params: {
heartbeatSeconds: 60,
messageTimeoutMs: 30,
watchdogCheckMs: 5,
statusSink: params.statusSink,
});
await vi.waitFor(
@@ -351,8 +353,10 @@ describe("web auto-reply connection", () => {
it("forces reconnect when watchdog closes without onClose", async () => {
vi.useFakeTimers();
try {
const { scripted, controller, run } = await startWatchdogScenario({
const statuses: Array<Record<string, unknown>> = [];
const { scripted, controller, run, runtime } = await startWatchdogScenario({
monitorWebChannel,
statusSink: (status) => statuses.push({ ...status }),
});
await vi.advanceTimersByTimeAsync(200);
@@ -368,6 +372,38 @@ describe("web auto-reply connection", () => {
scripted.resolveClose(1, { status: 499, isLoggedOut: false });
await Promise.resolve();
await run;
expect(runtime.log).toHaveBeenCalledWith(
expect.stringContaining("WhatsApp Web watchdog is recovering a stale connection"),
);
expect(runtime.error).not.toHaveBeenCalledWith(expect.stringContaining("status 499"));
expect(
statuses.some(
(status) =>
status.healthState === "reconnecting" &&
status.reconnectAttempts === 1 &&
(status.lastDisconnect as { status?: number } | null)?.status === 499,
),
).toBe(true);
expect(
statuses.every(
(status) =>
!(
status.lastDisconnect &&
typeof status.lastDisconnect === "object" &&
"expected" in status.lastDisconnect
),
),
).toBe(true);
expect(
statuses.some(
(status) =>
status.connected === true &&
status.healthState === "healthy" &&
status.reconnectAttempts === 0 &&
status.lastDisconnect === null,
),
).toBe(true);
} finally {
vi.useRealTimers();
}

View File

@@ -60,4 +60,58 @@ describe("createWebChannelStatusController", () => {
expect(last.connected).toBe(true);
expect(last.lastTransportActivityAt).toBe(1000);
});
it("clears watchdog recovery history once the socket is healthy again", () => {
const patches: Record<string, unknown>[] = [];
const controller = createWebChannelStatusController((s) => patches.push({ ...s }));
controller.noteConnected(1000);
controller.noteClose({
at: 2000,
statusCode: 499,
error: "status=499",
reconnectAttempts: 1,
healthState: "reconnecting",
watchdogRecovery: true,
});
expect(patches.at(-1)!.lastDisconnect).toEqual({
at: 2000,
status: 499,
error: "status=499",
loggedOut: false,
});
controller.noteConnected(3000);
const last = patches.at(-1)!;
expect(last.connected).toBe(true);
expect(last.healthState).toBe("healthy");
expect(last.reconnectAttempts).toBe(0);
expect(last.lastDisconnect).toBeNull();
});
it("keeps non-watchdog reconnect history after the socket reconnects", () => {
const patches: Record<string, unknown>[] = [];
const controller = createWebChannelStatusController((s) => patches.push({ ...s }));
controller.noteConnected(1000);
controller.noteClose({
at: 2000,
statusCode: 408,
error: "status=408",
reconnectAttempts: 1,
healthState: "reconnecting",
});
controller.noteConnected(3000);
const last = patches.at(-1)!;
expect(last.connected).toBe(true);
expect(last.healthState).toBe("healthy");
expect(last.reconnectAttempts).toBe(1);
expect(last.lastDisconnect).toEqual({
at: 2000,
status: 408,
error: "status=408",
loggedOut: false,
});
});
});

View File

@@ -16,6 +16,7 @@ function isTerminalHealthState(healthState: WebChannelHealthState | undefined):
}
export function createWebChannelStatusController(statusSink?: (status: WebChannelStatus) => void) {
let lastDisconnectWasWatchdogRecovery = false;
const status: WebChannelStatus = {
running: true,
connected: false,
@@ -39,6 +40,11 @@ export function createWebChannelStatusController(statusSink?: (status: WebChanne
noteConnected(at = Date.now()) {
Object.assign(status, createConnectedChannelStatusPatch(at));
Object.assign(status, createTransportActivityStatusPatch(at));
if (lastDisconnectWasWatchdogRecovery) {
status.lastDisconnect = null;
status.reconnectAttempts = 0;
lastDisconnectWasWatchdogRecovery = false;
}
status.lastError = null;
status.healthState = "healthy";
emit();
@@ -78,8 +84,10 @@ export function createWebChannelStatusController(statusSink?: (status: WebChanne
error?: string;
reconnectAttempts: number;
healthState: WebChannelHealthState;
watchdogRecovery?: boolean;
}) {
const at = params.at ?? Date.now();
lastDisconnectWasWatchdogRecovery = params.watchdogRecovery === true;
status.connected = false;
status.lastEventAt = at;
status.lastDisconnect = {

View File

@@ -11,6 +11,7 @@ import { getChildLogger } from "openclaw/plugin-sdk/runtime-env";
import {
defaultRuntime,
formatDurationPrecise,
warn,
type RuntimeEnv,
} from "openclaw/plugin-sdk/runtime-env";
import { enqueueSystemEvent } from "openclaw/plugin-sdk/system-event-runtime";
@@ -18,6 +19,7 @@ import { resolveWhatsAppAccount, resolveWhatsAppMediaMaxBytes } from "../account
import { WHATSAPP_AUTH_UNSTABLE_CODE, WhatsAppAuthUnstableError } from "../auth-store.js";
import {
WhatsAppConnectionController,
WHATSAPP_WATCHDOG_TIMEOUT_ERROR,
type ManagedWhatsAppListener,
} from "../connection-controller.js";
import { attachWebInboxToSocket, type WhatsAppGroupMetadataCache } from "../inbound/monitor.js";
@@ -566,11 +568,14 @@ export async function monitorWebChannel(
break;
}
const isWatchdogRecoveryReconnect =
decision.normalized.error === WHATSAPP_WATCHDOG_TIMEOUT_ERROR;
statusController.noteClose({
statusCode: decision.normalized.statusCode,
error: decision.normalized.errorText,
reconnectAttempts: decision.reconnectAttempts,
healthState: decision.healthState,
watchdogRecovery: isWatchdogRecoveryReconnect,
});
reconnectLogger.info(
{
@@ -582,9 +587,14 @@ export async function monitorWebChannel(
},
"web reconnect: scheduling retry",
);
runtime.error(
`WhatsApp Web connection closed (status ${decision.normalized.statusLabel}). Retry ${decision.reconnectAttempts}/${reconnectPolicy.maxAttempts || "∞"} in ${formatDurationPrecise(decision.delayMs ?? 0)}… (${decision.normalized.errorText})`,
);
const reconnectMessage = isWatchdogRecoveryReconnect
? `WhatsApp Web watchdog is recovering a stale connection (status ${decision.normalized.statusLabel}). Retry ${decision.reconnectAttempts}/${reconnectPolicy.maxAttempts || "∞"} in ${formatDurationPrecise(decision.delayMs ?? 0)}.`
: `WhatsApp Web connection closed (status ${decision.normalized.statusLabel}). Retry ${decision.reconnectAttempts}/${reconnectPolicy.maxAttempts || "∞"} in ${formatDurationPrecise(decision.delayMs ?? 0)}… (${decision.normalized.errorText})`;
if (isWatchdogRecoveryReconnect) {
runtime.log(warn(reconnectMessage));
} else {
runtime.error(reconnectMessage);
}
await controller.closeCurrentConnection();
try {
await controller.waitBeforeRetry(decision.delayMs ?? 0);

View File

@@ -23,6 +23,7 @@ const WHATSAPP_LOGGED_OUT_RELINK_MESSAGE =
"WhatsApp reported the session is logged out. Cleared cached web session; please rerun openclaw channels login and scan the QR again.";
export const WHATSAPP_LOGGED_OUT_QR_MESSAGE =
"WhatsApp reported the session is logged out. Cleared cached web session; please scan a new QR.";
export const WHATSAPP_WATCHDOG_TIMEOUT_ERROR = "watchdog-timeout";
type TimerHandle = ReturnType<typeof setInterval>;
type WaSocket = Awaited<ReturnType<typeof createWaSocket>>;
@@ -641,7 +642,7 @@ export class WhatsAppConnectionController {
this.forceClose({
status: 499,
isLoggedOut: false,
error: "watchdog-timeout",
error: WHATSAPP_WATCHDOG_TIMEOUT_ERROR,
});
}, this.watchdogCheckMs);
}