refactor(gateway): classify gateway transport failures

# Conflicts:
#	CHANGELOG.md
This commit is contained in:
Peter Steinberger
2026-04-29 08:10:01 +01:00
parent e25b542100
commit 023d3371a5
5 changed files with 233 additions and 19 deletions

View File

@@ -15,7 +15,6 @@ Docs: https://docs.openclaw.ai
### Fixes
- MCP/plugins: stringify non-array plugin tool results with chat-content coercion instead of default object stringification, so MCP callers receive useful JSON/text content from plugin tools. Thanks @vincentkoc.
- CLI/logs: fall back to the configured Gateway file log when implicit loopback Gateway connections close or time out before or during `logs.tail`, so `openclaw logs` still works while diagnosing local-model Gateway disconnects. Refs #74078. Thanks @sakalaboator.
- MCP/plugins: stringify non-array plugin tool results with chat-content coercion instead of default object stringification, so MCP callers receive useful JSON/text content from plugin tools. Thanks @vincentkoc.
- Channels/Discord: remove Discord-owned queued-run timeout replies through the shared channel lifecycle queue while preserving message ordering and compatibility timeout constants, so long Discord turns stay governed by session/tool/runtime lifecycle instead of channel fallback errors. Thanks @codexGW.

View File

@@ -1,7 +1,41 @@
import { afterEach, describe, expect, it, vi } from "vitest";
import { GatewayTransportError } from "../gateway/call.js";
import { runRegisteredCli } from "../test-utils/command-runner.js";
import { formatLogTimestamp, registerLogsCli } from "./logs-cli.js";
const { MockGatewayTransportError } = vi.hoisted(() => ({
MockGatewayTransportError: class extends Error {
readonly kind: string;
readonly connectionDetails: unknown;
readonly code?: number;
readonly reason?: string;
readonly timeoutMs?: number;
constructor(params: {
kind: string;
message: string;
connectionDetails: unknown;
code?: number;
reason?: string;
timeoutMs?: number;
}) {
super(params.message);
this.name = "GatewayTransportError";
this.kind = params.kind;
this.connectionDetails = params.connectionDetails;
if (params.code !== undefined) {
this.code = params.code;
}
if (params.reason !== undefined) {
this.reason = params.reason;
}
if (params.timeoutMs !== undefined) {
this.timeoutMs = params.timeoutMs;
}
}
},
}));
const callGatewayFromCli = vi.fn();
const readConfiguredLogTail = vi.fn();
const buildGatewayConnectionDetails = vi.fn(
@@ -18,9 +52,11 @@ const buildGatewayConnectionDetails = vi.fn(
);
vi.mock("../gateway/call.js", () => ({
GatewayTransportError: MockGatewayTransportError,
buildGatewayConnectionDetails: (
...args: Parameters<typeof import("../gateway/call.js").buildGatewayConnectionDetails>
) => buildGatewayConnectionDetails(...args),
isGatewayTransportError: (value: unknown) => value instanceof MockGatewayTransportError,
}));
vi.mock("../logging/log-tail.js", () => ({
@@ -155,7 +191,7 @@ describe("logs cli", () => {
maxBytes: 250_000,
});
expect(stdoutWrites.join("")).toContain("local fallback line");
expect(stderrWrites.join("")).toContain("reading local log file instead");
expect(stderrWrites.join("")).toContain("Local Gateway RPC unavailable");
});
it("falls back to the local log file on loopback scope-upgrade errors", async () => {
@@ -178,12 +214,22 @@ describe("logs cli", () => {
expect(readConfiguredLogTail).toHaveBeenCalledTimes(1);
expect(stdoutWrites.join("")).toContain("local fallback line");
expect(stderrWrites.join("")).toContain("reading local log file instead");
expect(stderrWrites.join("")).toContain("Local Gateway RPC unavailable");
});
it("falls back to the configured Gateway file log on loopback gateway close errors", async () => {
callGatewayFromCli.mockRejectedValueOnce(
new Error("gateway closed (1000 normal closure): no close reason"),
new GatewayTransportError({
kind: "closed",
code: 1000,
reason: "no close reason",
connectionDetails: {
url: "ws://127.0.0.1:18789",
urlSource: "local loopback",
message: "",
},
message: "gateway closed (1000 normal closure): no close reason",
}),
);
readConfiguredLogTail.mockResolvedValueOnce({
file: "/tmp/openclaw.log",
@@ -204,9 +250,40 @@ describe("logs cli", () => {
expect(stderrWrites.join("")).toContain("Local Gateway RPC unavailable");
});
it("falls back to the configured Gateway file log on post-handshake plain close errors", async () => {
callGatewayFromCli.mockRejectedValueOnce(new Error("gateway closed (1006): abnormal closure"));
readConfiguredLogTail.mockResolvedValueOnce({
file: "/tmp/openclaw.log",
cursor: 5,
size: 5,
lines: ["local fallback line"],
truncated: false,
reset: false,
});
const stdoutWrites = captureStdoutWrites();
const stderrWrites = captureStderrWrites();
await runLogsCli(["logs"]);
expect(readConfiguredLogTail).toHaveBeenCalledTimes(1);
expect(stdoutWrites.join("")).toContain("local fallback line");
expect(stderrWrites.join("")).toContain("Local Gateway RPC unavailable");
});
it("does not use local fallback for explicit Gateway URLs", async () => {
callGatewayFromCli.mockRejectedValueOnce(
new Error("gateway closed (1000 normal closure): no close reason"),
new GatewayTransportError({
kind: "closed",
code: 1000,
reason: "no close reason",
connectionDetails: {
url: "ws://127.0.0.1:18789",
urlSource: "local loopback",
message: "",
},
message: "gateway closed (1000 normal closure): no close reason",
}),
);
const stdoutWrites = captureStdoutWrites();

View File

@@ -1,6 +1,10 @@
import { setTimeout as delay } from "node:timers/promises";
import type { Command } from "commander";
import { buildGatewayConnectionDetails } from "../gateway/call.js";
import {
buildGatewayConnectionDetails,
isGatewayTransportError,
type GatewayConnectionDetails,
} from "../gateway/call.js";
import { isLoopbackHost } from "../gateway/net.js";
import { readConnectPairingRequiredMessage } from "../gateway/protocol/connect-error-details.js";
import { formatErrorMessage } from "../infra/errors.js";
@@ -97,14 +101,19 @@ function normalizeErrorMessage(error: unknown): string {
}
function shouldUseLocalLogsFallback(opts: LogsCliOptions, error: unknown): boolean {
const message = normalizeLowercaseStringOrEmpty(normalizeErrorMessage(error));
if (!isLocalGatewayRpcUnavailableError(message)) {
if (!isLocalGatewayRpcUnavailableError(error)) {
return false;
}
if (typeof opts.url === "string" && opts.url.trim().length > 0) {
return false;
}
const connection = buildGatewayConnectionDetails();
const connection = isGatewayTransportError(error)
? error.connectionDetails
: buildGatewayConnectionDetails();
return isImplicitLoopbackGatewayConnection(connection);
}
function isImplicitLoopbackGatewayConnection(connection: GatewayConnectionDetails): boolean {
if (connection.urlSource !== "local loopback") {
return false;
}
@@ -115,15 +124,24 @@ function shouldUseLocalLogsFallback(opts: LogsCliOptions, error: unknown): boole
}
}
function isLocalGatewayRpcUnavailableError(message: string): boolean {
function isLocalGatewayRpcUnavailableError(error: unknown): boolean {
if (isGatewayTransportError(error)) {
return error.kind === "closed" || error.kind === "timeout";
}
const message = normalizeLowercaseStringOrEmpty(normalizeErrorMessage(error));
if (readConnectPairingRequiredMessage(message)) {
return true;
}
return (
message.includes("gateway closed (") ||
message.includes("gateway timeout after") ||
message.includes("gateway connect failed:")
);
// GatewayClient pending request failures are still plain Error instances.
return isPlainGatewayRequestCloseError(message) || isPlainGatewayRequestTimeoutError(message);
}
function isPlainGatewayRequestCloseError(message: string): boolean {
return message.startsWith("gateway closed (");
}
function isPlainGatewayRequestTimeoutError(message: string): boolean {
return /^gateway timeout after \d+ms\b/u.test(message);
}
export function formatLogTimestamp(

View File

@@ -95,8 +95,14 @@ vi.mock("./client.js", () => ({
},
}));
const { __testing, buildGatewayConnectionDetails, callGateway, callGatewayCli, callGatewayScoped } =
await import("./call.js");
const {
__testing,
buildGatewayConnectionDetails,
callGateway,
callGatewayCli,
callGatewayScoped,
isGatewayTransportError,
} = await import("./call.js");
class StubGatewayClient {
constructor(opts: {
@@ -820,6 +826,13 @@ describe("callGateway error details", () => {
expect(err?.message).toContain("Gateway target: ws://127.0.0.1:18789");
expect(err?.message).toContain("Source: local loopback");
expect(err?.message).toContain("Bind: loopback");
expect(isGatewayTransportError(err)).toBe(true);
expect(err).toMatchObject({
name: "GatewayTransportError",
kind: "closed",
code: 1006,
reason: "no close reason",
});
});
it("includes connection details on timeout", async () => {
@@ -841,6 +854,27 @@ describe("callGateway error details", () => {
expect(errMessage).toContain("Bind: loopback");
});
it("marks wrapper timeouts as typed gateway transport errors", async () => {
startMode = "silent";
setLocalLoopbackGatewayConfig();
vi.useFakeTimers();
let err: unknown;
const promise = callGateway({ method: "health", timeoutMs: 5 }).catch((caught) => {
err = caught;
});
await vi.advanceTimersByTimeAsync(5);
await promise;
expect(isGatewayTransportError(err)).toBe(true);
expect(err).toMatchObject({
name: "GatewayTransportError",
kind: "timeout",
timeoutMs: 5,
});
});
it("keeps the default wrapper timeout aligned with configured handshake timeout", async () => {
startMode = "silent";
getRuntimeConfig.mockReturnValue({

View File

@@ -82,6 +82,54 @@ export type CallGatewayOptions = CallGatewayBaseOptions & {
scopes?: OperatorScope[];
};
export type GatewayTransportErrorKind = "closed" | "timeout";
export class GatewayTransportError extends Error {
readonly kind: GatewayTransportErrorKind;
readonly connectionDetails: GatewayConnectionDetails;
readonly code?: number;
readonly reason?: string;
readonly timeoutMs?: number;
constructor(params: {
kind: GatewayTransportErrorKind;
message: string;
connectionDetails: GatewayConnectionDetails;
code?: number;
reason?: string;
timeoutMs?: number;
}) {
super(params.message);
this.name = "GatewayTransportError";
this.kind = params.kind;
this.connectionDetails = params.connectionDetails;
if (params.code !== undefined) {
this.code = params.code;
}
if (params.reason !== undefined) {
this.reason = params.reason;
}
if (params.timeoutMs !== undefined) {
this.timeoutMs = params.timeoutMs;
}
}
}
export function isGatewayTransportError(value: unknown): value is GatewayTransportError {
if (value instanceof GatewayTransportError) {
return true;
}
if (!(value instanceof Error) || value.name !== "GatewayTransportError") {
return false;
}
const candidate = value as Partial<GatewayTransportError>;
return (
(candidate.kind === "closed" || candidate.kind === "timeout") &&
typeof candidate.connectionDetails === "object" &&
candidate.connectionDetails !== null
);
}
const defaultCreateGatewayClient = (opts: GatewayClientOptions) => new GatewayClient(opts);
const defaultGatewayCallDeps = {
createGatewayClient: defaultCreateGatewayClient,
@@ -488,6 +536,33 @@ function formatGatewayTimeoutError(
return `gateway timeout after ${timeoutMs}ms\n${connectionDetails.message}`;
}
function createGatewayCloseTransportError(params: {
code: number;
reason: string;
connectionDetails: GatewayConnectionDetails;
}): GatewayTransportError {
const reasonText = normalizeOptionalString(params.reason) || "no close reason";
return new GatewayTransportError({
kind: "closed",
code: params.code,
reason: reasonText,
connectionDetails: params.connectionDetails,
message: formatGatewayCloseError(params.code, params.reason, params.connectionDetails),
});
}
function createGatewayTimeoutTransportError(params: {
timeoutMs: number;
connectionDetails: GatewayConnectionDetails;
}): GatewayTransportError {
return new GatewayTransportError({
kind: "timeout",
timeoutMs: params.timeoutMs,
connectionDetails: params.connectionDetails,
message: formatGatewayTimeoutError(params.timeoutMs, params.connectionDetails),
});
}
function ensureGatewaySupportsRequiredMethods(params: {
requiredMethods: string[] | undefined;
methods: string[] | undefined;
@@ -606,13 +681,24 @@ async function executeGatewayRequestWithScopes<T>(params: {
return;
}
ignoreClose = true;
stop(new Error(formatGatewayCloseError(code, reason, params.connectionDetails)));
stop(
createGatewayCloseTransportError({
code,
reason,
connectionDetails: params.connectionDetails,
}),
);
},
});
const timer = setTimeout(() => {
ignoreClose = true;
stop(new Error(formatGatewayTimeoutError(timeoutMs, params.connectionDetails)));
stop(
createGatewayTimeoutTransportError({
timeoutMs,
connectionDetails: params.connectionDetails,
}),
);
}, safeTimerTimeoutMs);
client.start();