From 7e41913a203a737dc26b76a5dd2b1512c6726021 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Tue, 28 Apr 2026 23:32:11 +0100 Subject: [PATCH] fix(gateway): reduce TUI history startup latency --- CHANGELOG.md | 2 + docs/channels/troubleshooting.md | 12 ++-- extensions/whatsapp/src/status-issues.test.ts | 50 ++++++++++++++ extensions/whatsapp/src/status-issues.ts | 46 ++++++++++++- src/gateway/server-methods/chat.ts | 3 - .../server.chat.gateway-server-chat-b.test.ts | 68 +++++++++++++++++++ 6 files changed, 171 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 40b86523d2d..32a84af74f4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,8 @@ Docs: https://docs.openclaw.ai - Tasks: keep terminal mirrored TaskFlow timestamps pinned to task completion time and let maintenance repair stale mirrors, so ACP terminal delivery updates no longer leave inconsistent flow audits. Refs #73609. Thanks @joerod26. - Gateway/sessions: add conservative stuck-session recovery that releases only stale session lanes while active embedded runs, reply operations, and lane tasks remain serialized, so queued follow-ups can drain without aborting legitimate long-running turns. Refs #73581, #73655, #73652, #73705, #73647, #73602, #73592, and #73601. Thanks @WS-Q0758, @bryangauvin, @spenceryang1996-dot, @bmilne1981, @mattmcintyre, @Vksh07, and @Spolen23. - Plugins: cache unchanged plugin manifest loads by file signature, reducing repeated JSON/JSON5 parsing and manifest normalization in bursty startup and runtime registry paths. Refs #73532 and #73647; carries forward #73678. Thanks @TheDutchRuler. +- CLI/TUI: keep `chat.history` off model-catalog discovery so initial Gateway-backed TUI history loads cannot block behind slow provider/plugin model scans on low-core hosts. Refs #73524. Thanks @harshcatsystems-collab. +- Channels/WhatsApp: flag recently reconnected linked accounts in channel status even when the socket is currently healthy, so flapping WhatsApp Web sessions no longer look clean after a brief reconnect. Refs #73602. Thanks @Vksh07. - Agents/model selection: resolve slash-form aliases before provider/model parsing and keep alias-resolved primary models subject to transient provider cooldowns, so cron and persisted sessions do not retry cooled-down raw aliases. Fixes #73573 and #73657. Thanks @akai-shuuichi and @hashslingers. - Agents/Claude CLI: reuse already-cached macOS Keychain credentials for no-prompt Claude credential reads, so doctor/runtime checks do not miss fresh interactive Claude auth. Fixes #73682. Thanks @RyanSandoval. - Agents/transcripts: strip empty assistant text blocks while preserving valid text, images, and signatures, so Anthropic-style providers no longer reject sanitized transcript turns. Fixes #73640. Thanks @jowhee327. diff --git a/docs/channels/troubleshooting.md b/docs/channels/troubleshooting.md index aa7d09caf01..cdd4d0be642 100644 --- a/docs/channels/troubleshooting.md +++ b/docs/channels/troubleshooting.md @@ -31,12 +31,12 @@ Healthy baseline: ### WhatsApp failure signatures -| Symptom | Fastest check | Fix | -| ------------------------------- | --------------------------------------------------- | -------------------------------------------------------- | -| Connected but no DM replies | `openclaw pairing list whatsapp` | Approve sender or switch DM policy/allowlist. | -| Group messages ignored | Check `requireMention` + mention patterns in config | Mention the bot or relax mention policy for that group. | -| QR login times out with 408 | Check gateway `HTTPS_PROXY` / `HTTP_PROXY` env | Set a reachable proxy; use `NO_PROXY` only for bypasses. | -| Random disconnect/relogin loops | `openclaw channels status --probe` + logs | Re-login and verify credentials directory is healthy. | +| Symptom | Fastest check | Fix | +| ------------------------------- | --------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------- | +| Connected but no DM replies | `openclaw pairing list whatsapp` | Approve sender or switch DM policy/allowlist. | +| Group messages ignored | Check `requireMention` + mention patterns in config | Mention the bot or relax mention policy for that group. | +| QR login times out with 408 | Check gateway `HTTPS_PROXY` / `HTTP_PROXY` env | Set a reachable proxy; use `NO_PROXY` only for bypasses. | +| Random disconnect/relogin loops | `openclaw channels status --probe` + logs | Recent reconnects are flagged even when currently connected; watch logs, restart the gateway, then relink if flapping continues. | Full troubleshooting: [WhatsApp troubleshooting](/channels/whatsapp#troubleshooting) diff --git a/extensions/whatsapp/src/status-issues.test.ts b/extensions/whatsapp/src/status-issues.test.ts index 9ad9d509400..43d76ebf508 100644 --- a/extensions/whatsapp/src/status-issues.test.ts +++ b/extensions/whatsapp/src/status-issues.test.ts @@ -84,4 +84,54 @@ describe("collectWhatsAppStatusIssues", () => { }), ]); }); + + it("reports recently reconnected accounts even when the socket is currently healthy", () => { + const issues = collectWhatsAppStatusIssues([ + { + accountId: "default", + enabled: true, + linked: true, + running: true, + connected: true, + reconnectAttempts: 3, + healthState: "healthy", + lastDisconnect: { + at: Date.now() - 2 * 60_000, + status: 408, + error: "status=408 Request Time-out Connection was lost", + }, + }, + ]); + + expect(issues).toEqual([ + expect.objectContaining({ + channel: "whatsapp", + accountId: "default", + kind: "runtime", + message: + "Linked but recently reconnected (reconnectAttempts=3): status=408 Request Time-out Connection was lost", + }), + ]); + }); + + it("does not report old reconnect history after a stable healthy period", () => { + const issues = collectWhatsAppStatusIssues([ + { + accountId: "default", + enabled: true, + linked: true, + running: true, + connected: true, + reconnectAttempts: 1, + healthState: "healthy", + lastDisconnect: { + at: Date.now() - 60 * 60_000, + status: 408, + error: "old disconnect", + }, + }, + ]); + + expect(issues).toEqual([]); + }); }); diff --git a/extensions/whatsapp/src/status-issues.ts b/extensions/whatsapp/src/status-issues.ts index c5bdc41fcd9..1511ca10dfe 100644 --- a/extensions/whatsapp/src/status-issues.ts +++ b/extensions/whatsapp/src/status-issues.ts @@ -17,11 +17,14 @@ type WhatsAppAccountStatus = { connected?: unknown; running?: unknown; reconnectAttempts?: unknown; + lastDisconnect?: unknown; lastInboundAt?: unknown; lastError?: unknown; healthState?: unknown; }; +const RECENT_DISCONNECT_WARNING_WINDOW_MS = 15 * 60 * 1000; + function readWhatsAppAccountStatus(value: ChannelAccountSnapshot): WhatsAppAccountStatus | null { if (!isRecord(value)) { return null; @@ -34,12 +37,34 @@ function readWhatsAppAccountStatus(value: ChannelAccountSnapshot): WhatsAppAccou connected: value.connected, running: value.running, reconnectAttempts: value.reconnectAttempts, + lastDisconnect: value.lastDisconnect, lastInboundAt: value.lastInboundAt, lastError: value.lastError, healthState: value.healthState, }; } +function readLastDisconnect(value: unknown): { at: number | null; error?: string } | null { + if (typeof value === "string") { + const error = asString(value); + return error ? { at: null, error } : null; + } + if (!isRecord(value)) { + return null; + } + return { + at: typeof value.at === "number" ? value.at : null, + error: asString(value.error), + }; +} + +function isRecentDisconnect(disconnect: { at: number | null } | null, now = Date.now()): boolean { + if (disconnect?.at == null) { + return false; + } + return now - disconnect.at <= RECENT_DISCONNECT_WARNING_WINDOW_MS; +} + export function collectWhatsAppStatusIssues( accounts: ChannelAccountSnapshot[], ): ChannelStatusIssue[] { @@ -55,7 +80,8 @@ export function collectWhatsAppStatusIssues( typeof account.reconnectAttempts === "number" ? account.reconnectAttempts : null; const lastInboundAt = typeof account.lastInboundAt === "number" ? account.lastInboundAt : null; - const lastError = asString(account.lastError); + const lastDisconnect = readLastDisconnect(account.lastDisconnect); + const lastError = asString(account.lastError) ?? lastDisconnect?.error; const healthState = asString(account.healthState); if (statusState === "unstable") { @@ -127,6 +153,24 @@ export function collectWhatsAppStatusIssues( return; } + if ( + linked && + running && + connected && + reconnectAttempts != null && + reconnectAttempts > 0 && + isRecentDisconnect(lastDisconnect) + ) { + issues.push({ + channel: "whatsapp", + accountId, + kind: "runtime", + message: `Linked but recently reconnected (reconnectAttempts=${reconnectAttempts})${lastError ? `: ${lastError}` : "."}`, + fix: `Watch: ${formatCliCommand("openclaw logs --follow")} and run ${formatCliCommand("openclaw channels status --probe")} if disconnects continue. If it keeps flapping, restart the gateway or relink via channels login.`, + }); + return; + } + if (running && !connected) { issues.push({ channel: "whatsapp", diff --git a/src/gateway/server-methods/chat.ts b/src/gateway/server-methods/chat.ts index 595a278d226..2a5188684d5 100644 --- a/src/gateway/server-methods/chat.ts +++ b/src/gateway/server-methods/chat.ts @@ -1706,14 +1706,11 @@ export const chatHandlers: GatewayRequestHandlers = { } let thinkingLevel = entry?.thinkingLevel; if (!thinkingLevel) { - const loadedCatalog = await context.loadGatewayModelCatalog().catch(() => undefined); - const modelCatalog = Array.isArray(loadedCatalog) ? loadedCatalog : undefined; thinkingLevel = resolveGatewaySessionThinkingDefault({ cfg, agentId: sessionAgentId, provider: resolvedSessionModel.provider, model: resolvedSessionModel.model, - modelCatalog, }); } const verboseLevel = entry?.verboseLevel ?? cfg.agents?.defaults?.verboseDefault; diff --git a/src/gateway/server.chat.gateway-server-chat-b.test.ts b/src/gateway/server.chat.gateway-server-chat-b.test.ts index 7427ce08cfc..9cbfa9330ff 100644 --- a/src/gateway/server.chat.gateway-server-chat-b.test.ts +++ b/src/gateway/server.chat.gateway-server-chat-b.test.ts @@ -135,6 +135,74 @@ async function prepareMainHistoryHarness(params: { } describe("gateway server chat", () => { + test("chat.history does not wait for model catalog discovery to return history", async () => { + const sessionDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-gw-")); + try { + testState.sessionStorePath = path.join(sessionDir, "sessions.json"); + testState.agentConfig = { + model: { primary: "test-provider/slow-catalog-model" }, + }; + await writeSessionStore({ + entries: { + main: { + sessionId: "sess-main", + modelProvider: "test-provider", + model: "slow-catalog-model", + updatedAt: Date.now(), + }, + }, + }); + const responses: Array<{ ok: boolean; payload?: unknown; error?: unknown }> = []; + const context = { + loadGatewayModelCatalog: vi.fn( + async () => { + throw new Error("model catalog should not load for chat.history"); + }, + ), + logGateway: { + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + debug: vi.fn(), + }, + } as unknown as GatewayRequestContext; + const { chatHandlers } = await import("./server-methods/chat.js"); + + await chatHandlers["chat.history"]({ + req: { + type: "req", + id: "history-no-catalog", + method: "chat.history", + params: { sessionKey: "main" }, + }, + params: { sessionKey: "main" }, + client: null, + isWebchatConnect: () => false, + respond: ((ok, payload, error) => { + responses.push({ ok, payload, error }); + }) as RespondFn, + context, + }); + + expect(context.loadGatewayModelCatalog).not.toHaveBeenCalled(); + expect(responses).toEqual([ + expect.objectContaining({ + ok: true, + payload: expect.objectContaining({ + sessionKey: "main", + sessionId: "sess-main", + messages: expect.any(Array), + }), + }), + ]); + } finally { + clearConfigCache(); + testState.agentConfig = undefined; + testState.sessionStorePath = undefined; + await fs.rm(sessionDir, { recursive: true, force: true }); + } + }); + test("chat.send returns in_flight when duplicate attachment send wins parsing race", async () => { const sessionDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-gw-")); const dispatchRelease = createDeferred();