* 'main' of https://github.com/openclaw/openclaw:
  fix(cli): auto-reconnect logs --follow on transient gateway disconnect #74782 (#75059)
This commit is contained in:
Vincent Koc
2026-05-03 02:20:38 -07:00
3 changed files with 189 additions and 0 deletions

View File

@@ -57,6 +57,7 @@ openclaw logs --url ws://127.0.0.1:18789 --token "$OPENCLAW_GATEWAY_TOKEN"
- Use `--local-time` to render timestamps in your local timezone.
- If the implicit local loopback Gateway asks for pairing, closes during connect, or times out before `logs.tail` answers, `openclaw logs` falls back to the configured Gateway file log automatically. Explicit `--url` targets do not use this fallback.
- When using `--follow`, transient gateway disconnects (WebSocket close, timeout, connection drop) trigger automatic reconnection with exponential backoff (up to 8 retries, capped at 30 s between attempts). A warning is printed to stderr on each retry. Non-recoverable errors (auth failure, bad configuration) still exit immediately.
## Related

View File

@@ -65,6 +65,10 @@ vi.mock("../logging/log-tail.js", () => ({
) => readConfiguredLogTail(...args),
}));
vi.mock("../infra/backoff.js", () => ({
computeBackoff: vi.fn().mockReturnValue(0),
}));
vi.mock("./gateway-rpc.js", async () => {
const actual = await vi.importActual<typeof import("./gateway-rpc.js")>("./gateway-rpc.js");
return {
@@ -271,6 +275,147 @@ describe("logs cli", () => {
expect(stderrWrites.join("")).toContain("Local Gateway RPC unavailable");
});
describe("--follow retry behavior", () => {
it("uses local fallback (not retry warning) for loopback close errors in --follow mode", async () => {
// Loopback close errors are absorbed by shouldUseLocalLogsFallback inside fetchLogs —
// they never reach the retry path, so no "gateway disconnected" warning is emitted.
callGatewayFromCli.mockRejectedValueOnce(
new GatewayTransportError({
kind: "closed",
code: 1006,
reason: "abnormal closure",
connectionDetails: {
url: "ws://127.0.0.1:18789",
urlSource: "local loopback",
message: "",
},
message: "gateway closed (1006 abnormal closure): abnormal closure",
}),
);
readConfiguredLogTail.mockResolvedValueOnce({
file: "/tmp/openclaw.log",
cursor: 5,
lines: ["local fallback line"],
truncated: false,
reset: false,
});
const stderrWrites = captureStderrWrites();
const stdoutWrites = captureStdoutWrites();
const exitSpy = vi.spyOn(process, "exit").mockImplementation(() => undefined as never);
await runLogsCli(["logs", "--follow"]);
expect(stderrWrites.join("")).toContain("Local Gateway RPC unavailable");
expect(stderrWrites.join("")).not.toContain("gateway disconnected");
expect(stdoutWrites.join("")).toContain("local fallback line");
expect(exitSpy).toHaveBeenCalledWith(1);
});
it("exits after exhausting max retries in --follow mode with explicit URL", async () => {
// Explicit --url bypasses shouldUseLocalLogsFallback so close errors reach the retry path.
// initial attempt + 8 retries = 9 total calls before fatal exit.
const closeError = new GatewayTransportError({
kind: "closed",
code: 1006,
reason: "abnormal closure",
connectionDetails: {
url: "ws://127.0.0.1:18789",
urlSource: "cli",
message: "",
},
message: "gateway closed (1006 abnormal closure): abnormal closure",
});
for (let i = 0; i <= 8; i += 1) {
callGatewayFromCli.mockRejectedValueOnce(closeError);
}
const stderrWrites = captureStderrWrites();
const exitSpy = vi.spyOn(process, "exit").mockImplementation(() => undefined as never);
await runLogsCli(["logs", "--follow", "--url", "ws://127.0.0.1:18789"]);
expect((stderrWrites.join("").match(/gateway disconnected/g) ?? []).length).toBe(8);
expect(stderrWrites.join("")).toContain("Gateway not reachable");
expect(exitSpy).toHaveBeenCalledWith(1);
});
it("retries on transient close errors in --follow mode with explicit URL (no local fallback)", async () => {
callGatewayFromCli
.mockRejectedValueOnce(
new GatewayTransportError({
kind: "closed",
code: 1006,
reason: "abnormal closure",
connectionDetails: {
url: "ws://remote.example.com:18789",
urlSource: "cli",
message: "",
},
message: "gateway closed (1006 abnormal closure): abnormal closure",
}),
)
.mockResolvedValueOnce({
file: "/tmp/openclaw.log",
cursor: 10,
lines: ["line from remote"],
});
const stderrWrites = captureStderrWrites();
const stdoutWrites = captureStdoutWrites();
const exitSpy = vi.spyOn(process, "exit").mockImplementation(() => undefined as never);
await runLogsCli(["logs", "--follow", "--url", "ws://remote.example.com:18789"]);
expect(readConfiguredLogTail).not.toHaveBeenCalled();
expect(stderrWrites.join("")).toContain("gateway disconnected");
expect(stdoutWrites.join("")).toContain("line from remote");
expect(exitSpy).toHaveBeenCalledWith(1);
});
it("exits immediately on pairing-required close errors in --follow mode with explicit URL", async () => {
callGatewayFromCli.mockRejectedValueOnce(
new GatewayTransportError({
kind: "closed",
code: 1008,
reason: "pairing required",
connectionDetails: { url: "ws://127.0.0.1:18789", urlSource: "cli", message: "" },
message: "gateway closed (1008 policy violation): pairing required",
}),
);
const stderrWrites = captureStderrWrites();
const exitSpy = vi.spyOn(process, "exit").mockImplementation(() => undefined as never);
await runLogsCli(["logs", "--follow", "--url", "ws://127.0.0.1:18789"]);
expect(stderrWrites.join("")).not.toContain("gateway disconnected");
expect(stderrWrites.join("")).toContain("Gateway not reachable");
expect(exitSpy).toHaveBeenCalledWith(1);
});
it("exits immediately on app-defined auth errors (4xxx) in --follow mode with explicit URL", async () => {
callGatewayFromCli.mockRejectedValueOnce(
new GatewayTransportError({
kind: "closed",
code: 4001,
reason: "unauthorized",
connectionDetails: { url: "ws://127.0.0.1:18789", urlSource: "cli", message: "" },
message: "gateway closed (4001 unauthorized): unauthorized",
}),
);
const stderrWrites = captureStderrWrites();
const exitSpy = vi.spyOn(process, "exit").mockImplementation(() => undefined as never);
await runLogsCli(["logs", "--follow", "--url", "ws://127.0.0.1:18789"]);
expect(stderrWrites.join("")).not.toContain("gateway disconnected");
expect(stderrWrites.join("")).toContain("Gateway not reachable");
expect(exitSpy).toHaveBeenCalledWith(1);
});
});
it("does not use local fallback for explicit Gateway URLs", async () => {
callGatewayFromCli.mockRejectedValueOnce(
new GatewayTransportError({

View File

@@ -7,6 +7,7 @@ import {
} from "../gateway/call.js";
import { isLoopbackHost } from "../gateway/net.js";
import { readConnectPairingRequiredMessage } from "../gateway/protocol/connect-error-details.js";
import { computeBackoff } from "../infra/backoff.js";
import { formatErrorMessage } from "../infra/errors.js";
import { readConfiguredLogTail } from "../logging/log-tail.js";
import { parseLogLine } from "../logging/parse-log-line.js";
@@ -146,6 +147,29 @@ function isPlainGatewayRequestTimeoutError(message: string): boolean {
return /^gateway timeout after \d+ms\b/u.test(message);
}
const MAX_FOLLOW_RETRIES = 8;
const FOLLOW_BACKOFF_POLICY = { initialMs: 1_000, maxMs: 30_000, factor: 2, jitter: 0.2 };
// Returns true only for transport-level disconnects that are worth retrying.
// Auth errors (4xxx), policy violations (1008), and pairing-required messages are
// non-recoverable without user action and must not loop.
function isTransientFollowError(error: unknown): boolean {
if (isGatewayTransportError(error)) {
if (error.kind === "timeout") {
return true;
}
const code = error.code ?? 0;
// 1008 = policy violation (pairing required); 4xxx = app-defined (auth, rate-limit)
return code !== 1008 && !(code >= 4000 && code <= 4999);
}
const message = normalizeLowercaseStringOrEmpty(normalizeErrorMessage(error));
if (readConnectPairingRequiredMessage(message)) {
return false;
}
return isPlainGatewayRequestCloseError(message) || isPlainGatewayRequestTimeoutError(message);
}
export function formatLogTimestamp(
value?: string,
mode: "pretty" | "plain" = "plain",
@@ -306,6 +330,7 @@ export function registerLogsCli(program: Command) {
const localTime =
Boolean(opts.localTime) || (!!process.env.TZ && isValidTimeZone(process.env.TZ));
let followRetryAttempt = 0;
while (true) {
let payload: LogsTailPayload;
// Show progress spinner only on first fetch, not during follow polling
@@ -313,6 +338,23 @@ export function registerLogsCli(program: Command) {
try {
payload = await fetchLogs(opts, cursor, showProgress);
} catch (err) {
if (opts.follow && followRetryAttempt < MAX_FOLLOW_RETRIES && isTransientFollowError(err)) {
followRetryAttempt += 1;
const backoffMs = computeBackoff(FOLLOW_BACKOFF_POLICY, followRetryAttempt);
if (
!errorLine(
colorize(
rich,
theme.warn,
`[logs] gateway disconnected, reconnecting in ${Math.round(backoffMs / 1_000)}s...`,
),
)
) {
return;
}
await delay(backoffMs);
continue;
}
await emitGatewayError(
err,
opts,
@@ -324,6 +366,7 @@ export function registerLogsCli(program: Command) {
process.exit(1);
return;
}
followRetryAttempt = 0;
const lines = Array.isArray(payload.lines) ? payload.lines : [];
if (jsonMode) {
if (first) {