diff --git a/docs/cli/logs.md b/docs/cli/logs.md index 336aad31352..55155fc19ac 100644 --- a/docs/cli/logs.md +++ b/docs/cli/logs.md @@ -57,6 +57,7 @@ openclaw logs --url ws://127.0.0.1:18789 --token "$OPENCLAW_GATEWAY_TOKEN" - Use `--local-time` to render timestamps in your local timezone. - If the implicit local loopback Gateway asks for pairing, closes during connect, or times out before `logs.tail` answers, `openclaw logs` falls back to the configured Gateway file log automatically. Explicit `--url` targets do not use this fallback. +- When using `--follow`, transient gateway disconnects (WebSocket close, timeout, connection drop) trigger automatic reconnection with exponential backoff (up to 8 retries, capped at 30 s between attempts). A warning is printed to stderr on each retry. Non-recoverable errors (auth failure, bad configuration) still exit immediately. ## Related diff --git a/src/cli/logs-cli.test.ts b/src/cli/logs-cli.test.ts index 133f392e172..fd9fe673e0d 100644 --- a/src/cli/logs-cli.test.ts +++ b/src/cli/logs-cli.test.ts @@ -65,6 +65,10 @@ vi.mock("../logging/log-tail.js", () => ({ ) => readConfiguredLogTail(...args), })); +vi.mock("../infra/backoff.js", () => ({ + computeBackoff: vi.fn().mockReturnValue(0), +})); + vi.mock("./gateway-rpc.js", async () => { const actual = await vi.importActual("./gateway-rpc.js"); return { @@ -271,6 +275,147 @@ describe("logs cli", () => { expect(stderrWrites.join("")).toContain("Local Gateway RPC unavailable"); }); + describe("--follow retry behavior", () => { + it("uses local fallback (not retry warning) for loopback close errors in --follow mode", async () => { + // Loopback close errors are absorbed by shouldUseLocalLogsFallback inside fetchLogs — + // they never reach the retry path, so no "gateway disconnected" warning is emitted. + callGatewayFromCli.mockRejectedValueOnce( + new GatewayTransportError({ + kind: "closed", + code: 1006, + reason: "abnormal closure", + connectionDetails: { + url: "ws://127.0.0.1:18789", + urlSource: "local loopback", + message: "", + }, + message: "gateway closed (1006 abnormal closure): abnormal closure", + }), + ); + readConfiguredLogTail.mockResolvedValueOnce({ + file: "/tmp/openclaw.log", + cursor: 5, + lines: ["local fallback line"], + truncated: false, + reset: false, + }); + + const stderrWrites = captureStderrWrites(); + const stdoutWrites = captureStdoutWrites(); + const exitSpy = vi.spyOn(process, "exit").mockImplementation(() => undefined as never); + + await runLogsCli(["logs", "--follow"]); + + expect(stderrWrites.join("")).toContain("Local Gateway RPC unavailable"); + expect(stderrWrites.join("")).not.toContain("gateway disconnected"); + expect(stdoutWrites.join("")).toContain("local fallback line"); + expect(exitSpy).toHaveBeenCalledWith(1); + }); + + it("exits after exhausting max retries in --follow mode with explicit URL", async () => { + // Explicit --url bypasses shouldUseLocalLogsFallback so close errors reach the retry path. + // initial attempt + 8 retries = 9 total calls before fatal exit. + const closeError = new GatewayTransportError({ + kind: "closed", + code: 1006, + reason: "abnormal closure", + connectionDetails: { + url: "ws://127.0.0.1:18789", + urlSource: "cli", + message: "", + }, + message: "gateway closed (1006 abnormal closure): abnormal closure", + }); + for (let i = 0; i <= 8; i += 1) { + callGatewayFromCli.mockRejectedValueOnce(closeError); + } + + const stderrWrites = captureStderrWrites(); + const exitSpy = vi.spyOn(process, "exit").mockImplementation(() => undefined as never); + + await runLogsCli(["logs", "--follow", "--url", "ws://127.0.0.1:18789"]); + + expect((stderrWrites.join("").match(/gateway disconnected/g) ?? []).length).toBe(8); + expect(stderrWrites.join("")).toContain("Gateway not reachable"); + expect(exitSpy).toHaveBeenCalledWith(1); + }); + + it("retries on transient close errors in --follow mode with explicit URL (no local fallback)", async () => { + callGatewayFromCli + .mockRejectedValueOnce( + new GatewayTransportError({ + kind: "closed", + code: 1006, + reason: "abnormal closure", + connectionDetails: { + url: "ws://remote.example.com:18789", + urlSource: "cli", + message: "", + }, + message: "gateway closed (1006 abnormal closure): abnormal closure", + }), + ) + .mockResolvedValueOnce({ + file: "/tmp/openclaw.log", + cursor: 10, + lines: ["line from remote"], + }); + + const stderrWrites = captureStderrWrites(); + const stdoutWrites = captureStdoutWrites(); + const exitSpy = vi.spyOn(process, "exit").mockImplementation(() => undefined as never); + + await runLogsCli(["logs", "--follow", "--url", "ws://remote.example.com:18789"]); + + expect(readConfiguredLogTail).not.toHaveBeenCalled(); + expect(stderrWrites.join("")).toContain("gateway disconnected"); + expect(stdoutWrites.join("")).toContain("line from remote"); + expect(exitSpy).toHaveBeenCalledWith(1); + }); + + it("exits immediately on pairing-required close errors in --follow mode with explicit URL", async () => { + callGatewayFromCli.mockRejectedValueOnce( + new GatewayTransportError({ + kind: "closed", + code: 1008, + reason: "pairing required", + connectionDetails: { url: "ws://127.0.0.1:18789", urlSource: "cli", message: "" }, + message: "gateway closed (1008 policy violation): pairing required", + }), + ); + + const stderrWrites = captureStderrWrites(); + const exitSpy = vi.spyOn(process, "exit").mockImplementation(() => undefined as never); + + await runLogsCli(["logs", "--follow", "--url", "ws://127.0.0.1:18789"]); + + expect(stderrWrites.join("")).not.toContain("gateway disconnected"); + expect(stderrWrites.join("")).toContain("Gateway not reachable"); + expect(exitSpy).toHaveBeenCalledWith(1); + }); + + it("exits immediately on app-defined auth errors (4xxx) in --follow mode with explicit URL", async () => { + callGatewayFromCli.mockRejectedValueOnce( + new GatewayTransportError({ + kind: "closed", + code: 4001, + reason: "unauthorized", + connectionDetails: { url: "ws://127.0.0.1:18789", urlSource: "cli", message: "" }, + message: "gateway closed (4001 unauthorized): unauthorized", + }), + ); + + const stderrWrites = captureStderrWrites(); + const exitSpy = vi.spyOn(process, "exit").mockImplementation(() => undefined as never); + + await runLogsCli(["logs", "--follow", "--url", "ws://127.0.0.1:18789"]); + + expect(stderrWrites.join("")).not.toContain("gateway disconnected"); + expect(stderrWrites.join("")).toContain("Gateway not reachable"); + expect(exitSpy).toHaveBeenCalledWith(1); + }); + }); + it("does not use local fallback for explicit Gateway URLs", async () => { callGatewayFromCli.mockRejectedValueOnce( new GatewayTransportError({ diff --git a/src/cli/logs-cli.ts b/src/cli/logs-cli.ts index 5a967d84b8a..d2c3e539625 100644 --- a/src/cli/logs-cli.ts +++ b/src/cli/logs-cli.ts @@ -7,6 +7,7 @@ import { } from "../gateway/call.js"; import { isLoopbackHost } from "../gateway/net.js"; import { readConnectPairingRequiredMessage } from "../gateway/protocol/connect-error-details.js"; +import { computeBackoff } from "../infra/backoff.js"; import { formatErrorMessage } from "../infra/errors.js"; import { readConfiguredLogTail } from "../logging/log-tail.js"; import { parseLogLine } from "../logging/parse-log-line.js"; @@ -146,6 +147,29 @@ function isPlainGatewayRequestTimeoutError(message: string): boolean { return /^gateway timeout after \d+ms\b/u.test(message); } +const MAX_FOLLOW_RETRIES = 8; + +const FOLLOW_BACKOFF_POLICY = { initialMs: 1_000, maxMs: 30_000, factor: 2, jitter: 0.2 }; + +// Returns true only for transport-level disconnects that are worth retrying. +// Auth errors (4xxx), policy violations (1008), and pairing-required messages are +// non-recoverable without user action and must not loop. +function isTransientFollowError(error: unknown): boolean { + if (isGatewayTransportError(error)) { + if (error.kind === "timeout") { + return true; + } + const code = error.code ?? 0; + // 1008 = policy violation (pairing required); 4xxx = app-defined (auth, rate-limit) + return code !== 1008 && !(code >= 4000 && code <= 4999); + } + const message = normalizeLowercaseStringOrEmpty(normalizeErrorMessage(error)); + if (readConnectPairingRequiredMessage(message)) { + return false; + } + return isPlainGatewayRequestCloseError(message) || isPlainGatewayRequestTimeoutError(message); +} + export function formatLogTimestamp( value?: string, mode: "pretty" | "plain" = "plain", @@ -306,6 +330,7 @@ export function registerLogsCli(program: Command) { const localTime = Boolean(opts.localTime) || (!!process.env.TZ && isValidTimeZone(process.env.TZ)); + let followRetryAttempt = 0; while (true) { let payload: LogsTailPayload; // Show progress spinner only on first fetch, not during follow polling @@ -313,6 +338,23 @@ export function registerLogsCli(program: Command) { try { payload = await fetchLogs(opts, cursor, showProgress); } catch (err) { + if (opts.follow && followRetryAttempt < MAX_FOLLOW_RETRIES && isTransientFollowError(err)) { + followRetryAttempt += 1; + const backoffMs = computeBackoff(FOLLOW_BACKOFF_POLICY, followRetryAttempt); + if ( + !errorLine( + colorize( + rich, + theme.warn, + `[logs] gateway disconnected, reconnecting in ${Math.round(backoffMs / 1_000)}s...`, + ), + ) + ) { + return; + } + await delay(backoffMs); + continue; + } await emitGatewayError( err, opts, @@ -324,6 +366,7 @@ export function registerLogsCli(program: Command) { process.exit(1); return; } + followRetryAttempt = 0; const lines = Array.isArray(payload.lines) ? payload.lines : []; if (jsonMode) { if (first) {