mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 14:10:51 +00:00
fix(doctor): use lightweight gateway liveness check
This commit is contained in:
@@ -77,6 +77,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Gateway: expose `gateway.handshakeTimeoutMs` in config, schema, and docs while preserving `OPENCLAW_HANDSHAKE_TIMEOUT_MS` precedence, so loaded or low-powered hosts can tune local WebSocket pre-auth handshakes without patching dist files. Supersedes #51282; refs #73592 and #73652. Thanks @henry-the-frog.
|
||||
- Gateway/TUI/status: align configured and env-based WebSocket handshake budgets across local clients, probes, and fallback RPCs while preserving explicit status timeouts and paired-device auth fallback, so slow local gateways are not marked unreachable by a shorter client watchdog. Refs #73524, #73535, #73592, and #73602. Thanks @harshcatsystems-collab, @DJBlackhawk, and @Vksh07.
|
||||
- Gateway/startup: return retryable `UNAVAILABLE` during the sidecar startup window and keep CLI/TUI/status clients retrying inside their existing timeout budget, so early connects no longer surface as terminal handshake failures. Fixes #73652. Thanks @spenceryang1996-dot.
|
||||
- Doctor/Gateway: use a lightweight `status` RPC without channel summary work for doctor Gateway liveness, so slow health snapshots do not falsely drive service restart repair. Fixes #64400; supersedes #64511. Thanks @CHE10X and @EronFan.
|
||||
- Agents/auth: scope external CLI credential discovery to configured providers during model auth status and startup prewarm, so opencode-only and other single-provider gateways do not block on unrelated Claude CLI Keychain probes. Fixes #73908. Thanks @Ailuras.
|
||||
- Agents/model selection: resolve slash-form aliases before provider/model parsing and keep alias-resolved primary models subject to transient provider cooldowns, so cron and persisted sessions do not retry cooled-down raw aliases. Fixes #73573 and #73657. Thanks @akai-shuuichi and @hashslingers.
|
||||
- Agents/Claude CLI: reuse already-cached macOS Keychain credentials for no-prompt Claude credential reads, so doctor/runtime checks do not miss fresh interactive Claude auth. Fixes #73682. Thanks @RyanSandoval.
|
||||
|
||||
@@ -14,7 +14,51 @@ vi.mock("./health.js", () => ({
|
||||
healthCommand: vi.fn(),
|
||||
}));
|
||||
|
||||
import { probeGatewayMemoryStatus } from "./doctor-gateway-health.js";
|
||||
import { checkGatewayHealth, probeGatewayMemoryStatus } from "./doctor-gateway-health.js";
|
||||
|
||||
describe("checkGatewayHealth", () => {
|
||||
const cfg = {} as OpenClawConfig;
|
||||
|
||||
beforeEach(() => {
|
||||
callGateway.mockReset();
|
||||
});
|
||||
|
||||
it("uses a lightweight status RPC for the restart liveness gate", async () => {
|
||||
callGateway.mockResolvedValueOnce({ ok: true }).mockResolvedValueOnce({});
|
||||
const runtime = { log: vi.fn(), error: vi.fn(), exit: vi.fn() };
|
||||
|
||||
await expect(
|
||||
checkGatewayHealth({ runtime: runtime as never, cfg, timeoutMs: 3000 }),
|
||||
).resolves.toEqual({ healthOk: true });
|
||||
|
||||
expect(callGateway).toHaveBeenNthCalledWith(1, {
|
||||
method: "status",
|
||||
params: { includeChannelSummary: false },
|
||||
timeoutMs: 3000,
|
||||
config: cfg,
|
||||
});
|
||||
expect(callGateway).toHaveBeenNthCalledWith(2, {
|
||||
method: "channels.status",
|
||||
params: { probe: true, timeoutMs: 5000 },
|
||||
timeoutMs: 6000,
|
||||
});
|
||||
expect(runtime.error).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("does not run follow-up channel probes when liveness fails", async () => {
|
||||
callGateway.mockRejectedValueOnce(new Error("gateway timeout after 3000ms"));
|
||||
const runtime = { log: vi.fn(), error: vi.fn(), exit: vi.fn() };
|
||||
|
||||
await expect(
|
||||
checkGatewayHealth({ runtime: runtime as never, cfg, timeoutMs: 3000 }),
|
||||
).resolves.toEqual({ healthOk: false });
|
||||
|
||||
expect(callGateway).toHaveBeenCalledTimes(1);
|
||||
expect(runtime.error).toHaveBeenCalledWith(
|
||||
expect.stringContaining("Health check failed: Error: gateway timeout after 3000ms"),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("probeGatewayMemoryStatus", () => {
|
||||
const cfg = {} as OpenClawConfig;
|
||||
|
||||
@@ -6,7 +6,6 @@ import { formatErrorMessage } from "../infra/errors.js";
|
||||
import type { RuntimeEnv } from "../runtime.js";
|
||||
import { note } from "../terminal/note.js";
|
||||
import { formatHealthCheckFailure } from "./health-format.js";
|
||||
import { healthCommand } from "./health.js";
|
||||
|
||||
export type GatewayMemoryProbe = {
|
||||
checked: boolean;
|
||||
@@ -28,7 +27,12 @@ export async function checkGatewayHealth(params: {
|
||||
typeof params.timeoutMs === "number" && params.timeoutMs > 0 ? params.timeoutMs : 10_000;
|
||||
let healthOk = false;
|
||||
try {
|
||||
await healthCommand({ json: false, timeoutMs, config: params.cfg }, params.runtime);
|
||||
await callGateway({
|
||||
method: "status",
|
||||
params: { includeChannelSummary: false },
|
||||
timeoutMs,
|
||||
config: params.cfg,
|
||||
});
|
||||
healthOk = true;
|
||||
} catch (err) {
|
||||
const message = String(err);
|
||||
|
||||
@@ -29,10 +29,11 @@ export const healthHandlers: GatewayRequestHandlers = {
|
||||
respond(false, undefined, errorShape(ErrorCodes.UNAVAILABLE, formatForLog(err)));
|
||||
}
|
||||
},
|
||||
status: async ({ respond, client }) => {
|
||||
status: async ({ respond, client, params }) => {
|
||||
const scopes = Array.isArray(client?.connect?.scopes) ? client.connect.scopes : [];
|
||||
const status = await getStatusSummary({
|
||||
includeSensitive: scopes.includes(ADMIN_SCOPE),
|
||||
includeChannelSummary: params.includeChannelSummary !== false,
|
||||
});
|
||||
respond(true, status, undefined);
|
||||
},
|
||||
|
||||
@@ -1893,10 +1893,32 @@ describe("gateway healthHandlers.status scope handling", () => {
|
||||
async ({ scopes, includeSensitive }) => {
|
||||
const respond = await runHealthStatus(scopes);
|
||||
|
||||
expect(vi.mocked(statusModule.getStatusSummary)).toHaveBeenCalledWith({ includeSensitive });
|
||||
expect(vi.mocked(statusModule.getStatusSummary)).toHaveBeenCalledWith({
|
||||
includeSensitive,
|
||||
includeChannelSummary: true,
|
||||
});
|
||||
expect(respond).toHaveBeenCalledWith(true, { ok: true }, undefined);
|
||||
},
|
||||
);
|
||||
|
||||
it("can skip channel summary work for liveness-only status requests", async () => {
|
||||
const respond = vi.fn();
|
||||
|
||||
await healthHandlers.status({
|
||||
req: {} as never,
|
||||
params: { includeChannelSummary: false },
|
||||
respond: respond as never,
|
||||
context: {} as never,
|
||||
client: { connect: { role: "operator", scopes: ["operator.read"] } } as never,
|
||||
isWebchatConnect: () => false,
|
||||
});
|
||||
|
||||
expect(vi.mocked(statusModule.getStatusSummary)).toHaveBeenCalledWith({
|
||||
includeSensitive: false,
|
||||
includeChannelSummary: false,
|
||||
});
|
||||
expect(respond).toHaveBeenCalledWith(true, { ok: true }, undefined);
|
||||
});
|
||||
});
|
||||
|
||||
describe("logs.tail", () => {
|
||||
|
||||
Reference in New Issue
Block a user