diff --git a/CHANGELOG.md b/CHANGELOG.md index e4e45128736..c341a186f56 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -72,9 +72,13 @@ Docs: https://docs.openclaw.ai - Doctor/gateway: report recent supervisor restart handoffs in `openclaw doctor --deep`, using the installed service environment when available so service-managed clean exits are visible in guided diagnostics. Thanks @shakkernerd. - Gateway/status: show recent supervisor restart handoffs in `openclaw gateway status --deep`, including JSON details, so clean service-managed restarts are reported as restart handoffs instead of opaque stopped-service diagnostics. Thanks @shakkernerd. - Providers/Fireworks: expose Kimi models as thinking-off-only and keep K2.5/K2.6 requests on `thinking: disabled`, so manual model switches do not send Fireworks-rejected `reasoning*` parameters. Refs #74289. Thanks @frankekn. +- WhatsApp responsiveness: stop only verified stale local TUI clients when they degrade the Gateway event loop and delay replies. Thanks @vincentkoc. - Video generation: wait up to 20 minutes for slow fal/MiniMax queue-backed jobs, stop forwarding unsupported Google Veo generated-audio options, and normalize MiniMax `720P` requests to its supported `768P` resolution with the usual override warning/details instead of failing fallback. - Video generation: accept provider-specific aspect-ratio and resolution hints at the tool boundary, normalize `720P` to MiniMax's supported `768P`, and stop sending Google `generateAudio` on Gemini video requests so provider fallback can recover from model-specific parameter differences. Thanks @vincentkoc. - OpenAI/Google Meet: fail realtime voice connection attempts when the socket closes before `session.updated`, avoiding stuck Meet joins waiting on a bridge that never became ready. Thanks @vincentkoc. +- Hooks/session-memory: run reset memory capture off the command reply path and make model-generated memory filename slugs opt-in with `llmSlug: true`, so `/new` and `/reset` no longer block WhatsApp and other message-channel reset replies on hook housekeeping or a nested model call. Thanks @vincentkoc. +- CLI/gateway: pause non-TTY stdin after full CLI command completion and stop `openclaw agent` from falling back to embedded mode after gateway request/auth failures, so parent help commands exit cleanly and scoped delivery probes surface the real Gateway error immediately. Thanks @vincentkoc. +- Gateway/model catalog: cache empty read-only model catalog results until reload, so TUI and control-plane refresh loops cannot hammer plugin metadata reads when no usable models are currently discovered. Thanks @vincentkoc. - Google Meet: fork the caller's current agent transcript into agent-mode meeting consultant sessions, so Meet replies inherit the context from the tool call that joined the meeting. - Google Meet: log the concrete agent-mode TTS provider, model, voice, output format, and sample rate after speech synthesis, so Meet logs show which voice backend spoke each reply. - Google Meet: log the resolved audio provider model when starting Chrome and paired-node Meet talk-back bridges, so agent-mode joins show the STT model and bidi joins show the realtime voice model. diff --git a/docs/automation/hooks.md b/docs/automation/hooks.md index bb38d5818fa..ff04a87c36c 100644 --- a/docs/automation/hooks.md +++ b/docs/automation/hooks.md @@ -178,7 +178,7 @@ openclaw hooks enable ### session-memory details -Extracts the last 15 user/assistant messages, generates a descriptive filename slug via LLM, and saves to `/memory/YYYY-MM-DD-slug.md` using the host local date. Requires `workspace.dir` to be configured. +Extracts the last 15 user/assistant messages and saves to `/memory/YYYY-MM-DD-HHMM.md` using the host local date. Memory capture runs in the background so `/new` and `/reset` acknowledgements are not delayed by transcript reads or optional slug generation. Set `hooks.internal.entries.session-memory.llmSlug: true` to generate descriptive filename slugs with the configured model. Requires `workspace.dir` to be configured. diff --git a/docs/channels/troubleshooting.md b/docs/channels/troubleshooting.md index 88095974663..db30bc0af99 100644 --- a/docs/channels/troubleshooting.md +++ b/docs/channels/troubleshooting.md @@ -31,12 +31,13 @@ Healthy baseline: ### WhatsApp failure signatures -| Symptom | Fastest check | Fix | -| ------------------------------- | --------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------- | -| Connected but no DM replies | `openclaw pairing list whatsapp` | Approve sender or switch DM policy/allowlist. | -| Group messages ignored | Check `requireMention` + mention patterns in config | Mention the bot or relax mention policy for that group. | -| QR login times out with 408 | Check gateway `HTTPS_PROXY` / `HTTP_PROXY` env | Set a reachable proxy; use `NO_PROXY` only for bypasses. | -| Random disconnect/relogin loops | `openclaw channels status --probe` + logs | Recent reconnects are flagged even when currently connected; watch logs, restart the gateway, then relink if flapping continues. | +| Symptom | Fastest check | Fix | +| ----------------------------------- | --------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------- | +| Connected but no DM replies | `openclaw pairing list whatsapp` | Approve sender or switch DM policy/allowlist. | +| Group messages ignored | Check `requireMention` + mention patterns in config | Mention the bot or relax mention policy for that group. | +| QR login times out with 408 | Check gateway `HTTPS_PROXY` / `HTTP_PROXY` env | Set a reachable proxy; use `NO_PROXY` only for bypasses. | +| Random disconnect/relogin loops | `openclaw channels status --probe` + logs | Recent reconnects are flagged even when currently connected; watch logs, restart the gateway, then relink if flapping continues. | +| Replies arrive seconds/minutes late | `openclaw doctor --fix` | Doctor stops verified stale local TUI clients when they are degrading the Gateway event loop. | Full troubleshooting: [WhatsApp troubleshooting](/channels/whatsapp#troubleshooting) diff --git a/docs/cli/doctor.md b/docs/cli/doctor.md index 8d6f5d182ad..f0b34e7fdd2 100644 --- a/docs/cli/doctor.md +++ b/docs/cli/doctor.md @@ -45,6 +45,7 @@ Notes: - State integrity checks now detect orphan transcript files in the sessions directory. Archiving them as `.deleted.` requires an interactive confirmation; `--fix`, `--yes`, and headless runs leave them in place. - Doctor also scans `~/.openclaw/cron/jobs.json` (or `cron.store`) for legacy cron job shapes and can rewrite them in place before the scheduler has to auto-normalize them at runtime. - On Linux, doctor warns when the user's crontab still runs legacy `~/.openclaw/bin/ensure-whatsapp.sh`; that script is no longer maintained and can log false WhatsApp gateway outages when cron lacks the systemd user-bus environment. +- When WhatsApp is enabled, doctor checks for a degraded Gateway event loop with local `openclaw-tui` clients still running. `doctor --fix` stops only verified local TUI clients so WhatsApp replies are not queued behind stale TUI refresh loops. - Doctor cleans legacy plugin dependency staging state created by older OpenClaw versions. It also repairs missing downloadable plugins that are referenced by config, such as `plugins.entries`, configured channels, configured provider/search settings, or configured agent runtimes. During package updates, doctor skips package-manager plugin repair until the package swap is complete; rerun `openclaw doctor --fix` afterward if a configured plugin still needs recovery. If the download fails, doctor reports the install error and preserves the configured plugin entry for the next repair attempt. - Doctor repairs stale plugin config by removing missing plugin ids from `plugins.allow`/`plugins.entries`, plus matching dangling channel config, heartbeat targets, and channel model overrides when plugin discovery is healthy. - Doctor quarantines invalid plugin config by disabling the affected `plugins.entries.` entry and removing its invalid `config` payload. Gateway startup already skips only that bad plugin so other plugins and channels can keep running. diff --git a/docs/cli/hooks.md b/docs/cli/hooks.md index 67af210ec1b..c2c6c284ebe 100644 --- a/docs/cli/hooks.md +++ b/docs/cli/hooks.md @@ -282,7 +282,7 @@ Saves session context to memory when you issue `/new` or `/reset`. openclaw hooks enable session-memory ``` -**Output:** `~/.openclaw/workspace/memory/YYYY-MM-DD-slug.md` +**Output:** `~/.openclaw/workspace/memory/YYYY-MM-DD-HHMM.md` by default. Set `hooks.internal.entries.session-memory.llmSlug: true` for model-generated filename slugs. **See:** [session-memory documentation](/automation/hooks#session-memory) diff --git a/docs/gateway/doctor.md b/docs/gateway/doctor.md index 360a4653d34..bb9886d82c8 100644 --- a/docs/gateway/doctor.md +++ b/docs/gateway/doctor.md @@ -107,6 +107,7 @@ cat ~/.openclaw/openclaw.json - Matrix channel legacy state migration (in `--fix` / `--repair` mode). - Gateway runtime checks (service installed but not running; cached launchd label). - Channel status warnings (probed from the running gateway). + - WhatsApp responsiveness checks for degraded Gateway event-loop health with local TUI clients still running; `--fix` stops only verified local TUI clients. - Supervisor config audit (launchd/systemd/schtasks) with optional repair. - Embedded proxy environment cleanup for gateway services that captured shell `HTTP_PROXY` / `HTTPS_PROXY` / `NO_PROXY` values during install or update. - Gateway runtime best-practice checks (Node vs Bun, version-manager paths). diff --git a/src/cli/run-main.exit.test.ts b/src/cli/run-main.exit.test.ts index 50951fad425..6f8ab7e672b 100644 --- a/src/cli/run-main.exit.test.ts +++ b/src/cli/run-main.exit.test.ts @@ -248,6 +248,32 @@ describe("runCli exit behavior", () => { exitSpy.mockRestore(); }); + it("pauses non-tty stdin after full CLI command completion", async () => { + tryRouteCliMock.mockResolvedValueOnce(false); + const parseAsync = vi.fn().mockResolvedValueOnce(undefined); + buildProgramMock.mockReturnValueOnce({ + commands: [{ name: () => "channels", aliases: () => [] }], + parseAsync, + }); + const stdinTty = Object.getOwnPropertyDescriptor(process.stdin, "isTTY"); + Object.defineProperty(process.stdin, "isTTY", { configurable: true, value: false }); + const pauseSpy = vi.spyOn(process.stdin, "pause").mockImplementation(() => process.stdin); + + try { + await runCli(["node", "openclaw", "channels"]); + + expect(parseAsync).toHaveBeenCalledWith(["node", "openclaw", "channels"]); + expect(pauseSpy).toHaveBeenCalledTimes(1); + } finally { + pauseSpy.mockRestore(); + if (stdinTty) { + Object.defineProperty(process.stdin, "isTTY", stdinTty); + } else { + Reflect.deleteProperty(process.stdin, "isTTY"); + } + } + }); + it("emits the startup banner before gateway foreground fast-path startup", async () => { await runCli(["node", "openclaw", "gateway", "--force"]); diff --git a/src/cli/run-main.ts b/src/cli/run-main.ts index 7024d96c8f9..e4ee17de5b9 100644 --- a/src/cli/run-main.ts +++ b/src/cli/run-main.ts @@ -207,6 +207,18 @@ async function closeCliMemoryManagers(): Promise { } } +function pauseNonTtyStdinForCliExit(): void { + const stdin = process.stdin; + if (stdin.isTTY) { + return; + } + try { + stdin.pause(); + } catch { + // Best-effort cleanup for command paths that only inspected stdin. + } +} + export function resolveMissingPluginCommandMessage( pluginId: string, config?: OpenClawConfig, @@ -680,6 +692,7 @@ export async function runCli(argv: string[] = process.argv) { } await stopStartedProxy(); await closeCliMemoryManagers(); + pauseNonTtyStdinForCliExit(); } } diff --git a/src/commands/agent-via-gateway.test.ts b/src/commands/agent-via-gateway.test.ts index cb574074db0..e0343f34538 100644 --- a/src/commands/agent-via-gateway.test.ts +++ b/src/commands/agent-via-gateway.test.ts @@ -101,6 +101,19 @@ function createGatewayTimeoutError() { }); } +function createGatewayClosedError() { + const err = new Error("gateway closed before response"); + err.name = "GatewayTransportError"; + return Object.assign(err, { + kind: "closed", + connectionDetails: { + url: "ws://127.0.0.1:18789", + urlSource: "local loopback", + message: "Gateway target: ws://127.0.0.1:18789", + }, + }); +} + vi.mock("../config/config.js", () => ({ getRuntimeConfig: loadConfig, loadConfig })); vi.mock("../gateway/call.js", () => ({ callGateway, @@ -222,7 +235,7 @@ describe("agentCliCommand", () => { it("falls back to embedded agent when gateway fails", async () => { await withTempStore(async () => { - callGateway.mockRejectedValue(new Error("gateway not connected")); + callGateway.mockRejectedValue(createGatewayClosedError()); mockLocalAgentReply(); await agentCliCommand({ message: "hi", to: "+1555" }, runtime); @@ -242,6 +255,25 @@ describe("agentCliCommand", () => { }); }); + it("does not fall back to embedded agent for gateway request errors", async () => { + await withTempStore(async () => { + callGateway.mockRejectedValue( + Object.assign(new Error("missing scope: operator.admin"), { + name: "GatewayClientRequestError", + gatewayCode: "INVALID_REQUEST", + }), + ); + + await expect(agentCliCommand({ message: "hi", to: "+1555" }, runtime)).rejects.toThrow( + "missing scope: operator.admin", + ); + + expect(callGateway).toHaveBeenCalledTimes(1); + expect(agentCommand).not.toHaveBeenCalled(); + expect(runtime.error).not.toHaveBeenCalledWith(expect.stringContaining("EMBEDDED FALLBACK")); + }); + }); + it("uses a fresh embedded session when gateway agent times out", async () => { await withTempStore(async () => { callGateway.mockRejectedValue(createGatewayTimeoutError()); @@ -311,7 +343,7 @@ describe("agentCliCommand", () => { it("passes fallback metadata into JSON embedded fallback output", async () => { await withTempStore(async () => { - callGateway.mockRejectedValue(new Error("gateway not connected")); + callGateway.mockRejectedValue(createGatewayClosedError()); agentCommand.mockImplementationOnce(async (opts, rt) => { expect(loggingState.forceConsoleToStderr).toBe(true); const resultMetaOverrides = ( @@ -399,7 +431,7 @@ describe("agentCliCommand", () => { it("forces bundle MCP cleanup on embedded fallback", async () => { await withTempStore(async () => { - callGateway.mockRejectedValue(new Error("gateway not connected")); + callGateway.mockRejectedValue(createGatewayClosedError()); mockLocalAgentReply(); await agentCliCommand({ message: "hi", to: "+1555" }, runtime); diff --git a/src/commands/agent-via-gateway.ts b/src/commands/agent-via-gateway.ts index 23965faa43b..05edc0029bd 100644 --- a/src/commands/agent-via-gateway.ts +++ b/src/commands/agent-via-gateway.ts @@ -105,6 +105,10 @@ function isGatewayAgentTimeoutError(err: unknown): boolean { return err instanceof Error && err.message.includes("gateway request timeout for agent"); } +function isGatewayAgentEmbeddedFallbackError(err: unknown): boolean { + return isGatewayTransportError(err); +} + function createGatewayTimeoutFallbackSessionId(): string { return `${GATEWAY_TIMEOUT_FALLBACK_SESSION_PREFIX}${randomUUID()}`; } @@ -256,6 +260,10 @@ export async function agentCliCommand(opts: AgentCliOpts, runtime: RuntimeEnv, d ); } + if (!isGatewayAgentEmbeddedFallbackError(err)) { + throw err; + } + runtime.error?.( `EMBEDDED FALLBACK: Gateway agent failed; running embedded agent: ${String(err)}`, ); diff --git a/src/commands/doctor-gateway-health.test.ts b/src/commands/doctor-gateway-health.test.ts index b43bed03690..07846a4e8ab 100644 --- a/src/commands/doctor-gateway-health.test.ts +++ b/src/commands/doctor-gateway-health.test.ts @@ -29,7 +29,7 @@ describe("checkGatewayHealth", () => { await expect( checkGatewayHealth({ runtime: runtime as never, cfg, timeoutMs: 3000 }), - ).resolves.toEqual({ healthOk: true }); + ).resolves.toEqual({ healthOk: true, status: { ok: true } }); expect(callGateway).toHaveBeenNthCalledWith(1, { method: "status", @@ -55,7 +55,7 @@ describe("checkGatewayHealth", () => { expect(callGateway).toHaveBeenCalledTimes(1); expect(runtime.error).toHaveBeenCalledWith( - expect.stringContaining("Health check failed: Error: gateway timeout after 3000ms"), + expect.stringContaining("gateway timeout after 3000ms"), ); }); }); diff --git a/src/commands/doctor-gateway-health.ts b/src/commands/doctor-gateway-health.ts index 35c7279b13c..fac50a240aa 100644 --- a/src/commands/doctor-gateway-health.ts +++ b/src/commands/doctor-gateway-health.ts @@ -6,6 +6,7 @@ import { formatErrorMessage } from "../infra/errors.js"; import type { RuntimeEnv } from "../runtime.js"; import { note } from "../terminal/note.js"; import { formatHealthCheckFailure } from "./health-format.js"; +import type { StatusSummary } from "./status.types.js"; export type GatewayMemoryProbe = { checked: boolean; @@ -28,13 +29,14 @@ export async function checkGatewayHealth(params: { runtime: RuntimeEnv; cfg: OpenClawConfig; timeoutMs?: number; -}) { +}): Promise<{ healthOk: boolean; status?: StatusSummary }> { const gatewayDetails = buildGatewayConnectionDetails({ config: params.cfg }); const timeoutMs = typeof params.timeoutMs === "number" && params.timeoutMs > 0 ? params.timeoutMs : 10_000; let healthOk = false; + let status: StatusSummary | undefined; try { - await callGateway({ + status = await callGateway({ method: "status", params: { includeChannelSummary: false }, timeoutMs, @@ -77,7 +79,7 @@ export async function checkGatewayHealth(params: { } } - return { healthOk }; + return { healthOk, status }; } export async function probeGatewayMemoryStatus(params: { diff --git a/src/commands/doctor-whatsapp-responsiveness.test.ts b/src/commands/doctor-whatsapp-responsiveness.test.ts new file mode 100644 index 00000000000..82c1fe2c3b2 --- /dev/null +++ b/src/commands/doctor-whatsapp-responsiveness.test.ts @@ -0,0 +1,132 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; +import type { OpenClawConfig } from "../config/types.openclaw.js"; + +const noteMock = vi.hoisted(() => vi.fn()); +const spawnSyncMock = vi.hoisted(() => vi.fn()); + +vi.mock("node:child_process", async () => { + const { mockNodeChildProcessSpawnSync } = await import("openclaw/plugin-sdk/test-node-mocks"); + return mockNodeChildProcessSpawnSync(spawnSyncMock); +}); + +vi.mock("../terminal/note.js", () => ({ + note: noteMock, +})); + +const { listLocalTuiProcesses, noteWhatsappResponsivenessHealth, terminateLocalTuiProcesses } = + await import("./doctor-whatsapp-responsiveness.js"); + +describe("doctor WhatsApp responsiveness", () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it("lists only verified local TUI processes", () => { + spawnSyncMock.mockReturnValue({ + status: 0, + stdout: [ + " 101 openclaw-tui", + " 102 /usr/bin/node /usr/lib/node_modules/openclaw/dist/index.js gateway --port 18789", + " 103 openclaw channels", + " 104 openclaw tui --local", + ].join("\n"), + }); + + expect(listLocalTuiProcesses()).toEqual([ + { pid: 101, command: "openclaw-tui" }, + { pid: 104, command: "openclaw tui --local" }, + ]); + }); + + it("terminates stale local TUI processes with a kill fallback", async () => { + const alive = new Set([101]); + const signals: Array<[number, string | number]> = []; + const controller = { + kill: vi.fn((pid: number, signal: string | number) => { + signals.push([pid, signal]); + if (signal === "SIGKILL") { + alive.delete(pid); + return true; + } + if (signal === 0) { + if (alive.has(pid)) { + return true; + } + throw new Error("gone"); + } + return true; + }), + }; + + await expect( + terminateLocalTuiProcesses({ + processes: [{ pid: 101, command: "openclaw-tui" }], + controller, + graceMs: 0, + }), + ).resolves.toEqual({ stopped: [101], failed: [] }); + expect(signals).toEqual([ + [101, "SIGTERM"], + [101, 0], + [101, "SIGKILL"], + [101, 0], + ]); + }); + + it("warns and repairs local TUI pressure when WhatsApp is enabled and the gateway is degraded", async () => { + const terminate = vi.fn().mockResolvedValue({ stopped: [101], failed: [] }); + const cfg = { channels: { whatsapp: { enabled: true } } } as OpenClawConfig; + + await noteWhatsappResponsivenessHealth({ + cfg, + status: { + eventLoop: { + degraded: true, + reasons: ["event_loop_delay"], + intervalMs: 30_000, + delayP99Ms: 42, + delayMaxMs: 12_000, + utilization: 0.3, + cpuCoreRatio: 0.4, + }, + }, + shouldRepair: true, + listLocalTuiProcesses: () => [{ pid: 101, command: "openclaw-tui" }], + terminateLocalTuiProcesses: terminate, + }); + + expect(terminate).toHaveBeenCalledWith({ + processes: [{ pid: 101, command: "openclaw-tui" }], + }); + expect(noteMock).toHaveBeenCalledWith( + expect.stringContaining("Stopped local TUI clients: 101"), + "WhatsApp responsiveness", + ); + }); + + it("does not treat generic model routing as a WhatsApp-only issue", async () => { + const cfg = { + channels: { whatsapp: { enabled: true } }, + agents: { defaults: { model: { primary: "openai-codex/gpt-5.5" } } }, + } as OpenClawConfig; + + await noteWhatsappResponsivenessHealth({ + cfg, + status: { + eventLoop: { + degraded: false, + reasons: [], + intervalMs: 1, + delayP99Ms: 0, + delayMaxMs: 0, + utilization: 0, + cpuCoreRatio: 0, + }, + }, + shouldRepair: true, + listLocalTuiProcesses: () => [], + }); + + expect(noteMock).not.toHaveBeenCalled(); + }); +}); diff --git a/src/commands/doctor-whatsapp-responsiveness.ts b/src/commands/doctor-whatsapp-responsiveness.ts new file mode 100644 index 00000000000..92f20262664 --- /dev/null +++ b/src/commands/doctor-whatsapp-responsiveness.ts @@ -0,0 +1,177 @@ +import { spawnSync } from "node:child_process"; +import { formatCliCommand } from "../cli/command-format.js"; +import type { OpenClawConfig } from "../config/types.openclaw.js"; +import { note } from "../terminal/note.js"; +import type { StatusSummary } from "./status.types.js"; + +export type LocalTuiProcess = { + pid: number; + command: string; +}; + +type ProcessSignal = "SIGTERM" | "SIGKILL"; + +type ProcessController = { + kill: (pid: number, signal: ProcessSignal | 0) => boolean; +}; + +const LOCAL_TUI_CMD_RE = + /(?:^|\s)(?:openclaw-tui|openclaw\s+tui|openclaw\s+chat|openclaw\s+terminal)(?:\s|$)/; + +function parsePsPidLine(line: string): LocalTuiProcess | null { + const match = line.match(/^\s*(\d+)\s+(.+)$/); + if (!match) { + return null; + } + const pid = Number(match[1]); + if (!Number.isFinite(pid) || pid <= 0 || pid === process.pid) { + return null; + } + const command = match[2]?.trim() ?? ""; + if (!LOCAL_TUI_CMD_RE.test(command)) { + return null; + } + return { pid, command }; +} + +export function listLocalTuiProcesses(): LocalTuiProcess[] { + if (process.platform === "win32") { + return []; + } + const ps = spawnSync("ps", ["-axo", "pid=,command="], { + encoding: "utf8", + timeout: 1000, + }); + if (ps.error || ps.status !== 0 || typeof ps.stdout !== "string") { + return []; + } + const seen = new Set(); + const processes: LocalTuiProcess[] = []; + for (const line of ps.stdout.split(/\r?\n/)) { + const proc = parsePsPidLine(line); + if (!proc || seen.has(proc.pid)) { + continue; + } + seen.add(proc.pid); + processes.push(proc); + } + return processes; +} + +function hasWhatsappEnabled(cfg: OpenClawConfig): boolean { + const whatsapp = cfg.channels?.whatsapp; + if (!whatsapp || whatsapp.enabled === false) { + return false; + } + const accounts = whatsapp.accounts; + if (accounts && Object.keys(accounts).length > 0) { + return Object.values(accounts).some((account) => account?.enabled !== false); + } + return true; +} + +function formatPidList(processes: LocalTuiProcess[]): string { + return processes.map((proc) => String(proc.pid)).join(", "); +} + +function isProcessAlive(controller: ProcessController, pid: number): boolean { + try { + controller.kill(pid, 0); + return true; + } catch { + return false; + } +} + +async function sleep(ms: number): Promise { + await new Promise((resolve) => setTimeout(resolve, ms)); +} + +export async function terminateLocalTuiProcesses(params: { + processes: LocalTuiProcess[]; + controller?: ProcessController; + graceMs?: number; +}): Promise<{ stopped: number[]; failed: number[] }> { + const controller = params.controller ?? process; + const graceMs = Math.max(0, params.graceMs ?? 500); + const stopped: number[] = []; + const failed: number[] = []; + + for (const proc of params.processes) { + try { + controller.kill(proc.pid, "SIGTERM"); + } catch { + // Already gone is success for this repair. + } + } + if (graceMs > 0) { + await sleep(graceMs); + } + for (const proc of params.processes) { + if (!isProcessAlive(controller, proc.pid)) { + stopped.push(proc.pid); + continue; + } + try { + controller.kill(proc.pid, "SIGKILL"); + } catch { + // Already gone is still success. + } + if (isProcessAlive(controller, proc.pid)) { + failed.push(proc.pid); + } else { + stopped.push(proc.pid); + } + } + return { stopped, failed }; +} + +export async function noteWhatsappResponsivenessHealth(params: { + cfg: OpenClawConfig; + status?: Pick | null; + shouldRepair: boolean; + listLocalTuiProcesses?: () => LocalTuiProcess[]; + terminateLocalTuiProcesses?: typeof terminateLocalTuiProcesses; +}): Promise { + if (!hasWhatsappEnabled(params.cfg)) { + return; + } + + const warnings: string[] = []; + const tuiProcesses = (params.listLocalTuiProcesses ?? listLocalTuiProcesses)(); + const eventLoop = params.status?.eventLoop; + const gatewayDegraded = eventLoop?.degraded === true; + + if (gatewayDegraded && tuiProcesses.length > 0) { + warnings.push( + [ + "Gateway event loop is degraded while local TUI clients are running.", + "WhatsApp replies can queue behind TUI startup/session refresh work.", + `Local TUI pids: ${formatPidList(tuiProcesses)}`, + ].join("\n"), + ); + if (params.shouldRepair) { + const repair = await (params.terminateLocalTuiProcesses ?? terminateLocalTuiProcesses)({ + processes: tuiProcesses, + }); + const repairLines: string[] = []; + if (repair.stopped.length > 0) { + repairLines.push(`Stopped local TUI clients: ${repair.stopped.join(", ")}`); + } + if (repair.failed.length > 0) { + repairLines.push(`Could not stop local TUI clients: ${repair.failed.join(", ")}`); + } + if (repairLines.length > 0) { + warnings.push(repairLines.join("\n")); + } + } else { + warnings.push( + `Fix: close those TUI sessions, or run ${formatCliCommand("openclaw doctor --fix")}.`, + ); + } + } + + if (warnings.length > 0) { + note(warnings.join("\n\n"), "WhatsApp responsiveness"); + } +} diff --git a/src/flows/doctor-health-contributions.ts b/src/flows/doctor-health-contributions.ts index a6f6f1aed08..717fa4b7705 100644 --- a/src/flows/doctor-health-contributions.ts +++ b/src/flows/doctor-health-contributions.ts @@ -29,6 +29,7 @@ type DoctorHealthFlowContext = { env?: NodeJS.ProcessEnv; gatewayDetails?: ReturnType; healthOk?: boolean; + gatewayStatus?: import("../commands/status.types.js").StatusSummary; gatewayMemoryProbe?: Awaited>; }; @@ -493,12 +494,13 @@ async function runShellCompletionHealth(ctx: DoctorHealthFlowContext): Promise { const { checkGatewayHealth, probeGatewayMemoryStatus } = await import("../commands/doctor-gateway-health.js"); - const { healthOk } = await checkGatewayHealth({ + const { healthOk, status } = await checkGatewayHealth({ runtime: ctx.runtime, cfg: ctx.cfg, timeoutMs: ctx.options.nonInteractive === true ? 3000 : 10_000, }); ctx.healthOk = healthOk; + ctx.gatewayStatus = status; ctx.gatewayMemoryProbe = healthOk ? await probeGatewayMemoryStatus({ cfg: ctx.cfg, @@ -507,6 +509,16 @@ async function runGatewayHealthChecks(ctx: DoctorHealthFlowContext): Promise { + const { noteWhatsappResponsivenessHealth } = + await import("../commands/doctor-whatsapp-responsiveness.js"); + await noteWhatsappResponsivenessHealth({ + cfg: ctx.cfg, + status: ctx.gatewayStatus, + shouldRepair: ctx.prompter.shouldRepair, + }); +} + async function runMemorySearchHealthContribution(ctx: DoctorHealthFlowContext): Promise { const { maybeRepairMemoryRecallHealth, noteMemoryRecallHealth, noteMemorySearchHealth } = await import("../commands/doctor-memory-search.js"); @@ -743,6 +755,11 @@ export function resolveDoctorHealthContributions(): DoctorHealthContribution[] { label: "Gateway health", run: runGatewayHealthChecks, }), + createDoctorHealthContribution({ + id: "doctor:whatsapp-responsiveness", + label: "WhatsApp responsiveness", + run: runWhatsappResponsivenessHealth, + }), createDoctorHealthContribution({ id: "doctor:memory-search", label: "Memory search", diff --git a/src/gateway/server-model-catalog.test.ts b/src/gateway/server-model-catalog.test.ts index 6a4965b234e..d25695d1a39 100644 --- a/src/gateway/server-model-catalog.test.ts +++ b/src/gateway/server-model-catalog.test.ts @@ -76,7 +76,7 @@ describe("loadGatewayModelCatalog", () => { }); }); - it("does not cache an empty catalog so the next request retries", async () => { + it("caches an empty read-only catalog until reload marks it stale", async () => { const emptyCatalog: GatewayModelChoice[] = []; const freshCatalog = [model("gpt-5.5")]; const loadModelCatalog = vi @@ -88,9 +88,38 @@ describe("loadGatewayModelCatalog", () => { emptyCatalog, ); await expect(loadGatewayModelCatalog({ getConfig, loadModelCatalog })).resolves.toBe( - freshCatalog, + emptyCatalog, ); + expect(loadModelCatalog).toHaveBeenCalledTimes(1); + + markGatewayModelCatalogStaleForReload(); + await expect(loadGatewayModelCatalog({ getConfig, loadModelCatalog })).resolves.toBe( + emptyCatalog, + ); + await vi.waitFor(() => expect(loadModelCatalog).toHaveBeenCalledTimes(2)); + await vi.waitFor(async () => { + await expect(loadGatewayModelCatalog({ getConfig, loadModelCatalog })).resolves.toBe( + freshCatalog, + ); + }); + }); + + it("does not cache an empty full catalog so the next all-model request retries", async () => { + const emptyCatalog: GatewayModelChoice[] = []; + const freshCatalog = [model("gpt-5.5")]; + const loadModelCatalog = vi + .fn() + .mockResolvedValueOnce(emptyCatalog) + .mockResolvedValueOnce(freshCatalog); + + await expect( + loadGatewayModelCatalog({ getConfig, loadModelCatalog, readOnly: false }), + ).resolves.toBe(emptyCatalog); + await expect( + loadGatewayModelCatalog({ getConfig, loadModelCatalog, readOnly: false }), + ).resolves.toBe(freshCatalog); + expect(loadModelCatalog).toHaveBeenCalledTimes(2); }); diff --git a/src/gateway/server-model-catalog.ts b/src/gateway/server-model-catalog.ts index 372abd95776..91674b15241 100644 --- a/src/gateway/server-model-catalog.ts +++ b/src/gateway/server-model-catalog.ts @@ -71,7 +71,7 @@ function startGatewayModelCatalogRefresh( const refresh = resolveLoadModelCatalog(params) .then((loadModelCatalog) => loadModelCatalog({ config, readOnly })) .then((catalog) => { - if (catalog.length > 0 && refreshGeneration === cache.staleGeneration) { + if ((readOnly || catalog.length > 0) && refreshGeneration === cache.staleGeneration) { cache.lastSuccessfulCatalog = catalog; cache.appliedGeneration = cache.staleGeneration; } @@ -105,10 +105,10 @@ export async function loadGatewayModelCatalog( ): Promise { const cache = resolveGatewayModelCatalogCache(params); const isStale = isGatewayModelCatalogStale(cache); - if (!isStale && cache.lastSuccessfulCatalog) { + if (!isStale && cache.lastSuccessfulCatalog !== null) { return cache.lastSuccessfulCatalog; } - if (isStale && cache.lastSuccessfulCatalog) { + if (isStale && cache.lastSuccessfulCatalog !== null) { if (!cache.inFlightRefresh) { void startGatewayModelCatalogRefresh(params).catch(() => undefined); } diff --git a/src/hooks/bundled/session-memory/HOOK.md b/src/hooks/bundled/session-memory/HOOK.md index 8130fc91047..918c9b4bd27 100644 --- a/src/hooks/bundled/session-memory/HOOK.md +++ b/src/hooks/bundled/session-memory/HOOK.md @@ -24,8 +24,8 @@ When you run `/new` or `/reset` to start a fresh session: 1. **Finds the previous session** - Uses the pre-reset session entry to locate the correct transcript 2. **Extracts conversation** - Reads the last N user/assistant messages from the session (default: 15, configurable) -3. **Generates descriptive slug** - Uses LLM to create a meaningful filename slug based on conversation content -4. **Saves to memory** - Creates a new file at `/memory/YYYY-MM-DD-slug.md` +3. **Chooses filename slug** - Uses a local timestamp by default, or an LLM-generated description when `llmSlug` is enabled +4. **Saves to memory** - Creates a new file at `/memory/YYYY-MM-DD-HHMM.md` by default without delaying the `/new` or `/reset` reply ## Output Format @@ -41,26 +41,30 @@ Memory files are created with the following format: ## Filename Examples -The LLM generates descriptive slugs based on your conversation: +Timestamp slugs are the default so `/new` and `/reset` stay fast on message channels: + +- `2026-01-16-1430.md` - Default local timestamp slug + +With `llmSlug: true`, the configured model can generate descriptive slugs based on your conversation: - `2026-01-16-vendor-pitch.md` - Discussion about vendor evaluation - `2026-01-16-api-design.md` - API architecture planning - `2026-01-16-bug-fix.md` - Debugging session -- `2026-01-16-1430.md` - Fallback local timestamp if slug generation fails ## Requirements - **Config**: `workspace.dir` must be set (automatically configured during setup) -The hook uses your configured LLM provider to generate slugs, so it works with any provider (Anthropic, OpenAI, etc.). +When `llmSlug` is enabled, the hook uses your configured LLM provider to generate slugs, so it works with any provider (Anthropic, OpenAI, etc.). ## Configuration The hook supports optional configuration: -| Option | Type | Default | Description | -| ---------- | ------ | ------- | --------------------------------------------------------------- | -| `messages` | number | 15 | Number of user/assistant messages to include in the memory file | +| Option | Type | Default | Description | +| ---------- | ------- | ------- | ------------------------------------------------------------------------------------------- | +| `messages` | number | 15 | Number of user/assistant messages to include in the memory file | +| `llmSlug` | boolean | false | Use your configured model to generate descriptive filename slugs instead of timestamp slugs | Example configuration: @@ -71,7 +75,8 @@ Example configuration: "entries": { "session-memory": { "enabled": true, - "messages": 25 + "messages": 25, + "llmSlug": true } } } @@ -82,8 +87,10 @@ Example configuration: The hook automatically: - Uses your workspace directory (`~/.openclaw/workspace` by default) -- Uses your configured LLM for slug generation -- Falls back to timestamp slugs if LLM is unavailable +- Uses timestamp slugs by default so `/new` and `/reset` stay fast on message channels +- Runs memory capture in the background so reset acknowledgements can return immediately +- Uses your configured LLM for slug generation only when `llmSlug` is `true` +- Falls back to timestamp slugs if LLM slug generation is unavailable ## Disabling diff --git a/src/hooks/bundled/session-memory/handler.test.ts b/src/hooks/bundled/session-memory/handler.test.ts index e0232122f02..d824d5c54cf 100644 --- a/src/hooks/bundled/session-memory/handler.test.ts +++ b/src/hooks/bundled/session-memory/handler.test.ts @@ -6,6 +6,7 @@ import type { OpenClawConfig } from "../../../config/config.js"; import { writeWorkspaceFile } from "../../../test-helpers/workspace.js"; import { withEnvAsync } from "../../../test-utils/env.js"; import { createHookEvent } from "../../hooks.js"; +import { generateSlugViaLLM } from "../../llm-slug-generator.js"; import { findPreviousSessionFile, getRecentSessionContent, @@ -18,6 +19,7 @@ vi.mock("../../llm-slug-generator.js", () => ({ })); let handler: typeof import("./handler.js").default; +let flushSessionMemoryWritesForTest: typeof import("./handler.js").flushSessionMemoryWritesForTest; let suiteWorkspaceRoot = ""; let workspaceCaseCounter = 0; @@ -29,7 +31,7 @@ async function createCaseWorkspace(prefix = "case"): Promise { } beforeAll(async () => { - ({ default: handler } = await import("./handler.js")); + ({ default: handler, flushSessionMemoryWritesForTest } = await import("./handler.js")); suiteWorkspaceRoot = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-session-memory-")); }); @@ -93,6 +95,7 @@ async function runNewWithPreviousSessionEntry(params: { } await handler(event); + await flushSessionMemoryWritesForTest(); const memoryDir = path.join(params.tempDir, "memory"); const files = await fs.readdir(memoryDir); @@ -190,6 +193,16 @@ function expectMemoryConversation(params: { } } +async function waitUntil(condition: () => boolean, timeoutMs = 500): Promise { + const deadline = Date.now() + timeoutMs; + while (!condition()) { + if (Date.now() > deadline) { + throw new Error("condition was not met before timeout"); + } + await new Promise((resolve) => setTimeout(resolve, 5)); + } +} + describe("session-memory hook", () => { it("skips non-command events", async () => { const tempDir = await createCaseWorkspace("workspace"); @@ -237,6 +250,136 @@ describe("session-memory hook", () => { expect(memoryContent).toContain("assistant: 2+2 equals 4"); }); + it("does not call the model provider for a filename slug by default", async () => { + const sessionContent = createMockSessionContent([ + { role: "user", content: "Hello there" }, + { role: "assistant", content: "Hi! How can I help?" }, + ]); + + const generateSlug = vi.mocked(generateSlugViaLLM); + generateSlug.mockClear(); + + await withEnvAsync( + { + NODE_ENV: "production", + OPENCLAW_TEST_FAST: undefined, + VITEST: undefined, + }, + async () => { + const { files } = await runNewWithPreviousSession({ sessionContent }); + expect(files[0]).toMatch(/^\d{4}-\d{2}-\d{2}-\d{4}\.md$/); + }, + ); + + expect(generateSlug).not.toHaveBeenCalled(); + }); + + it("uses a model-generated filename slug only when explicitly enabled", async () => { + const sessionContent = createMockSessionContent([ + { role: "user", content: "What is 2+2?" }, + { role: "assistant", content: "2+2 equals 4" }, + ]); + + const generateSlug = vi.mocked(generateSlugViaLLM); + generateSlug.mockClear(); + generateSlug.mockResolvedValueOnce("simple-math"); + + await withEnvAsync( + { + NODE_ENV: "production", + OPENCLAW_TEST_FAST: undefined, + VITEST: undefined, + }, + async () => { + const { files } = await runNewWithPreviousSession({ + sessionContent, + cfg: (tempDir) => + ({ + agents: { defaults: { workspace: tempDir } }, + hooks: { + internal: { + entries: { + "session-memory": { + enabled: true, + llmSlug: true, + }, + }, + }, + }, + }) satisfies OpenClawConfig, + }); + expect(files).toEqual([expect.stringMatching(/^\d{4}-\d{2}-\d{2}-simple-math\.md$/)]); + }, + ); + + expect(generateSlug).toHaveBeenCalledTimes(1); + }); + + it("does not block reset command handling on opt-in model slug generation", async () => { + const tempDir = await createCaseWorkspace("workspace"); + const sessionsDir = path.join(tempDir, "sessions"); + await fs.mkdir(sessionsDir, { recursive: true }); + + const sessionFile = await writeWorkspaceFile({ + dir: sessionsDir, + name: "test-session.jsonl", + content: createMockSessionContent([ + { role: "user", content: "Investigate slow WhatsApp reset" }, + { role: "assistant", content: "Checking reset hooks" }, + ]), + }); + + let resolveSlug: ((slug: string | null) => void) | undefined; + const generateSlug = vi.mocked(generateSlugViaLLM); + generateSlug.mockClear(); + generateSlug.mockImplementationOnce( + () => + new Promise((resolve) => { + resolveSlug = resolve; + }), + ); + + await withEnvAsync( + { + NODE_ENV: "production", + OPENCLAW_TEST_FAST: undefined, + VITEST: undefined, + }, + async () => { + const event = createHookEvent("command", "new", "agent:main:main", { + cfg: { + agents: { defaults: { workspace: tempDir } }, + hooks: { + internal: { + entries: { + "session-memory": { + enabled: true, + llmSlug: true, + }, + }, + }, + }, + } satisfies OpenClawConfig, + previousSessionEntry: { + sessionId: "test-123", + sessionFile, + }, + }); + + const startedAt = Date.now(); + await handler(event); + expect(Date.now() - startedAt).toBeLessThan(100); + + await waitUntil(() => generateSlug.mock.calls.length === 1); + resolveSlug?.("slow-reset"); + await flushSessionMemoryWritesForTest(); + + const files = await fs.readdir(path.join(tempDir, "memory")); + expect(files).toEqual([expect.stringMatching(/^\d{4}-\d{2}-\d{2}-slow-reset\.md$/)]); + }, + ); + }); + it("creates memory file with session content on /reset command", async () => { const sessionContent = createMockSessionContent([ { role: "user", content: "Please reset and keep notes" }, diff --git a/src/hooks/bundled/session-memory/handler.ts b/src/hooks/bundled/session-memory/handler.ts index 647127d5fe2..3d0de6ec652 100644 --- a/src/hooks/bundled/session-memory/handler.ts +++ b/src/hooks/bundled/session-memory/handler.ts @@ -2,7 +2,7 @@ * Session memory hook handler * * Saves session context to memory when /new or /reset command is triggered - * Creates a new dated memory file with LLM-generated slug + * Creates a new dated memory file with a timestamp slug by default */ import fs from "node:fs/promises"; @@ -107,13 +107,13 @@ function resolveDisplaySessionKey(params: { /** * Save session context to memory when /new or /reset command is triggered */ -const saveSessionToMemory: HookHandler = async (event) => { - // Only trigger on reset/new commands - const isResetCommand = event.action === "new" || event.action === "reset"; - if (event.type !== "command" || !isResetCommand) { - return; - } +const pendingSessionMemoryWrites = new Set>(); +export async function flushSessionMemoryWritesForTest(): Promise { + await Promise.allSettled(pendingSessionMemoryWrites); +} + +async function saveSessionMemoryNow(event: Parameters[0]): Promise { try { log.debug("Hook triggered for reset/new command", { action: event.action }); @@ -142,7 +142,7 @@ const saveSessionToMemory: HookHandler = async (event) => { const localTimestamp = formatLocalSessionTimestamp(now); const dateStr = localTimestamp.date; - // Generate descriptive slug from session using LLM + // Generate descriptive slug from session when explicitly enabled // Prefer previousSessionEntry (old session before /new) over current (which may be empty) const sessionEntry = (context.previousSessionEntry || context.sessionEntry || {}) as Record< string, @@ -206,7 +206,7 @@ const saveSessionToMemory: HookHandler = async (event) => { process.env.VITEST === "true" || process.env.VITEST === "1" || process.env.NODE_ENV === "test"; - const allowLlmSlug = !isTestEnv && hookConfig?.llmSlug !== false; + const allowLlmSlug = !isTestEnv && hookConfig?.llmSlug === true; if (sessionContent && cfg && allowLlmSlug) { log.debug("Calling generateSlugViaLLM..."); @@ -277,6 +277,21 @@ const saveSessionToMemory: HookHandler = async (event) => { log.error("Failed to save session memory", { error: String(err) }); } } +} + +const saveSessionToMemory: HookHandler = (event) => { + // Only trigger on reset/new commands. This is silent housekeeping, so keep it + // off the command reply path. + const isResetCommand = event.action === "new" || event.action === "reset"; + if (event.type !== "command" || !isResetCommand) { + return; + } + + const writePromise = saveSessionMemoryNow(event); + pendingSessionMemoryWrites.add(writePromise); + void writePromise.finally(() => { + pendingSessionMemoryWrites.delete(writePromise); + }); }; export default saveSessionToMemory;