Doctor: expose gateway runtime findings (#97075)

* feat(doctor): expose gateway runtime findings

* fix(doctor): redact gateway health targets
This commit is contained in:
Gio Della-Libera
2026-06-27 17:17:49 -07:00
committed by GitHub
parent 7bbd09047b
commit 3630d502eb
5 changed files with 395 additions and 3 deletions

View File

@@ -9,6 +9,10 @@ const mocks = vi.hoisted(() => ({
disposeBundleRuntime: vi.fn(),
loadModelCatalog: vi.fn(async (): Promise<Array<Record<string, unknown>>> => []),
normalizeProviderToolSchemasWithPlugin: vi.fn(),
buildGatewayProbeConnectionDetails: vi.fn(),
probeGatewayStatus: vi.fn(),
readGatewayServiceState: vi.fn(),
resolveGatewayService: vi.fn(() => ({ label: "openclaw-gateway" })),
resolvePluginProviders: vi.fn((): Array<Record<string, unknown>> => []),
resolveDefaultModelForAgent: vi.fn(() => ({ provider: "openai", model: "gpt-5.5" })),
}));
@@ -35,6 +39,19 @@ vi.mock("../agents/agent-tools.js", () => ({
createOpenClawCodingTools: mocks.createOpenClawCodingTools,
}));
vi.mock("../gateway/call.js", () => ({
buildGatewayProbeConnectionDetails: mocks.buildGatewayProbeConnectionDetails,
}));
vi.mock("../cli/daemon-cli/probe.js", () => ({
probeGatewayStatus: mocks.probeGatewayStatus,
}));
vi.mock("../daemon/service.js", () => ({
readGatewayServiceState: mocks.readGatewayServiceState,
resolveGatewayService: mocks.resolveGatewayService,
}));
vi.mock("../plugins/provider-runtime.js", () => ({
inspectProviderToolSchemasWithPlugin: () => [],
normalizeProviderToolSchemasWithPlugin: mocks.normalizeProviderToolSchemasWithPlugin,
@@ -48,8 +65,12 @@ vi.mock("../plugins/providers.runtime.js", () => ({
resolvePluginProviders: mocks.resolvePluginProviders,
}));
const { collectProviderCatalogProjectionFindings, collectRuntimeToolSchemaFindings } =
await import("./doctor-core-checks.runtime.js");
const {
collectGatewayDaemonFindings,
collectGatewayHealthFindings,
collectProviderCatalogProjectionFindings,
collectRuntimeToolSchemaFindings,
} = await import("./doctor-core-checks.runtime.js");
function tool(name: string, parameters: unknown): AnyAgentTool {
return {
@@ -79,6 +100,22 @@ describe("doctor runtime tool schema checks", () => {
mocks.normalizeProviderToolSchemasWithPlugin
.mockReset()
.mockImplementation(({ context }) => context.tools);
mocks.buildGatewayProbeConnectionDetails.mockReset().mockResolvedValue({
url: "http://127.0.0.1:5829",
});
mocks.probeGatewayStatus.mockReset().mockResolvedValue({
ok: true,
server: { version: "2026.6.26" },
});
mocks.readGatewayServiceState.mockReset().mockResolvedValue({
installed: true,
loaded: true,
running: true,
env: {},
command: { programArguments: ["openclaw", "gateway"], sourcePath: "/tmp/gateway.service" },
runtime: { status: "running" },
});
mocks.resolveGatewayService.mockClear();
mocks.resolvePluginProviders.mockReset().mockReturnValue([]);
mocks.resolveDefaultModelForAgent.mockClear();
});
@@ -503,6 +540,100 @@ describe("doctor runtime tool schema checks", () => {
});
});
describe("doctor gateway runtime checks", () => {
beforeEach(() => {
mocks.buildGatewayProbeConnectionDetails.mockReset().mockResolvedValue({
url: "http://127.0.0.1:5829",
});
mocks.probeGatewayStatus.mockReset().mockResolvedValue({
ok: true,
server: { version: "2026.6.26" },
});
mocks.readGatewayServiceState.mockReset().mockResolvedValue({
installed: true,
loaded: true,
running: true,
env: {},
command: { programArguments: ["openclaw", "gateway"], sourcePath: "/tmp/gateway.service" },
runtime: { status: "running" },
});
mocks.resolveGatewayService.mockReset().mockReturnValue({ label: "openclaw-gateway" });
});
it("reports unreachable gateway health probes", async () => {
mocks.probeGatewayStatus.mockResolvedValueOnce({
ok: false,
error: "connect ECONNREFUSED 127.0.0.1:5829",
});
await expect(
collectGatewayHealthFindings({ cfg: { gateway: { mode: "local" } } }),
).resolves.toContainEqual({
checkId: "core/doctor/gateway-health",
severity: "warning",
message: "Gateway is not reachable: connect ECONNREFUSED 127.0.0.1:5829",
path: "gateway.mode",
target: "http://127.0.0.1:5829",
fixHint:
"Start the Gateway service or run `openclaw doctor --fix` for service repair prompts.",
});
});
it("redacts sensitive remote gateway URLs from health finding targets", async () => {
mocks.buildGatewayProbeConnectionDetails.mockResolvedValueOnce({
url: "wss://user:pass@gateway.example.test/rpc?token=secret&safe=value",
});
mocks.probeGatewayStatus.mockResolvedValueOnce({
ok: false,
error: "remote gateway did not answer",
});
const findings = await collectGatewayHealthFindings({
cfg: { gateway: { mode: "remote", remote: { url: "wss://gateway.example.test/rpc" } } },
});
expect(findings).toContainEqual({
checkId: "core/doctor/gateway-health",
severity: "warning",
message: "Gateway is not reachable: remote gateway did not answer",
path: "gateway.remote.url",
target: "wss://***:***@gateway.example.test/rpc?token=***&safe=value",
fixHint: "Verify the remote Gateway URL, network path, TLS settings, and credentials.",
});
expect(JSON.stringify(findings)).not.toContain("user:pass");
expect(JSON.stringify(findings)).not.toContain("token=secret");
});
it("reports missing local gateway daemon service", async () => {
mocks.readGatewayServiceState.mockResolvedValueOnce({
installed: false,
loaded: false,
running: false,
env: {},
command: null,
});
await expect(
collectGatewayDaemonFindings({ cfg: { gateway: { mode: "local" } } }),
).resolves.toContainEqual({
checkId: "core/doctor/gateway-daemon",
severity: "warning",
message: "Gateway service is not installed.",
path: "gateway.mode",
target: "openclaw-gateway",
fixHint: "Run `openclaw doctor --fix` or `openclaw gateway install` to install it.",
});
});
it("skips daemon findings for remote gateway mode", async () => {
await expect(
collectGatewayDaemonFindings({ cfg: { gateway: { mode: "remote" } } }),
).resolves.toEqual([]);
expect(mocks.readGatewayServiceState).not.toHaveBeenCalled();
});
});
describe("doctor provider catalog projection checks", () => {
beforeEach(() => {
mocks.resolvePluginProviders.mockReset().mockReturnValue([]);

View File

@@ -1,4 +1,5 @@
// Doctor runtime checks inspect tool names, browser residue, and runtime state.
import { redactSensitiveUrlLikeString } from "@openclaw/net-policy/redact-sensitive-url";
import { TOOL_NAME_SEPARATOR } from "../agents/agent-bundle-mcp-names.js";
import {
type McpToolCatalogDiagnostic,
@@ -30,20 +31,32 @@ import {
type RuntimeToolSchemaDiagnostic,
} from "../agents/tool-schema-projection.js";
import type { AnyAgentTool } from "../agents/tools/common.js";
import { probeGatewayStatus } from "../cli/daemon-cli/probe.js";
import { collectUnavailableAgentSkills } from "../commands/doctor-skills-core.js";
import { gatewayProbeResultSawGateway } from "../commands/gateway-health-auth-diagnostic.js";
import type { OpenClawConfig } from "../config/types.openclaw.js";
import {
getSystemdCgroupHygieneSummary,
type GatewayServiceRuntime,
} from "../daemon/service-runtime.js";
import { resolveGatewayService, readGatewayServiceState } from "../daemon/service.js";
import { buildGatewayProbeConnectionDetails } from "../gateway/call.js";
import { formatErrorMessage } from "../infra/errors.js";
import type { ProviderRuntimeModel } from "../plugins/provider-runtime-model.types.js";
import { getPluginToolMeta, setPluginToolMeta } from "../plugins/tools.js";
import type { ProviderCatalogOrder, ProviderPlugin } from "../plugins/types.js";
import { normalizeAgentId } from "../routing/session-key.js";
import { buildWorkspaceSkillStatus, type SkillStatusEntry } from "../skills/discovery/status.js";
import type { HealthFinding } from "./health-checks.js";
import type { HealthCheckContext, HealthFinding } from "./health-checks.js";
type BundleMcpToolRuntime = Awaited<ReturnType<typeof createBundleMcpToolRuntime>>;
const PROVIDER_CATALOG_ORDERS = ["simple", "profile", "paired", "late"] as const;
const PROVIDER_CATALOG_ORDER_SET = new Set<ProviderCatalogOrder>(PROVIDER_CATALOG_ORDERS);
function formatGatewayHealthTarget(url: string): string {
return redactSensitiveUrlLikeString(url);
}
export function detectUnavailableSkills(cfg: OpenClawConfig): SkillStatusEntry[] {
const agentId = resolveDefaultAgentId(cfg);
const workspaceDir = resolveAgentWorkspaceDir(cfg, agentId);
@@ -54,6 +67,136 @@ export function detectUnavailableSkills(cfg: OpenClawConfig): SkillStatusEntry[]
return collectUnavailableAgentSkills(report);
}
export async function collectGatewayHealthFindings(
ctx: Pick<HealthCheckContext, "cfg" | "configPath">,
): Promise<readonly HealthFinding[]> {
let probeDetails: Awaited<ReturnType<typeof buildGatewayProbeConnectionDetails>>;
try {
probeDetails = await buildGatewayProbeConnectionDetails({
config: ctx.cfg,
...(ctx.configPath ? { configPath: ctx.configPath } : {}),
});
} catch (error) {
return [
{
checkId: "core/doctor/gateway-health",
severity: "warning",
message: `Gateway health probe could not be prepared: ${formatErrorMessage(error)}`,
path: ctx.cfg.gateway?.mode === "remote" ? "gateway.remote.url" : "gateway",
fixHint:
"Fix Gateway connection configuration, then rerun `openclaw doctor --lint --only core/doctor/gateway-health`.",
},
];
}
const probe = await probeGatewayStatus({
url: probeDetails.url,
timeoutMs: 3000,
tlsFingerprint: probeDetails.tlsFingerprint,
preauthHandshakeTimeoutMs: probeDetails.preauthHandshakeTimeoutMs,
config: ctx.cfg,
json: true,
});
if (gatewayProbeResultSawGateway(probe)) {
return [];
}
const mode = ctx.cfg.gateway?.mode === "remote" ? "remote" : "local";
return [
{
checkId: "core/doctor/gateway-health",
severity: "warning",
message: `Gateway is not reachable: ${probe.error ?? "status probe failed"}`,
path: mode === "remote" ? "gateway.remote.url" : "gateway.mode",
target: formatGatewayHealthTarget(probeDetails.url),
fixHint:
mode === "remote"
? "Verify the remote Gateway URL, network path, TLS settings, and credentials."
: "Start the Gateway service or run `openclaw doctor --fix` for service repair prompts.",
},
];
}
function gatewayRuntimeStatus(runtime: GatewayServiceRuntime | undefined): string | undefined {
return runtime?.status ?? runtime?.state ?? runtime?.subState;
}
export async function collectGatewayDaemonFindings(
ctx: Pick<HealthCheckContext, "cfg">,
): Promise<readonly HealthFinding[]> {
if (ctx.cfg.gateway?.mode === "remote") {
return [];
}
const service = resolveGatewayService();
const state = await readGatewayServiceState(service, { env: process.env });
const findings: HealthFinding[] = [];
if (!state.installed) {
findings.push({
checkId: "core/doctor/gateway-daemon",
severity: "warning",
message: "Gateway service is not installed.",
path: "gateway.mode",
target: service.label,
fixHint: "Run `openclaw doctor --fix` or `openclaw gateway install` to install it.",
});
return findings;
}
if (!state.loaded) {
findings.push({
checkId: "core/doctor/gateway-daemon",
severity: "warning",
message: "Gateway service is installed but not loaded.",
path: state.command?.sourcePath,
target: service.label,
fixHint: "Run `openclaw doctor --fix` or `openclaw gateway start` to load it.",
});
}
const status = gatewayRuntimeStatus(state.runtime);
if (state.loaded && !state.running) {
findings.push({
checkId: "core/doctor/gateway-daemon",
severity: "warning",
message: status
? `Gateway service runtime is ${status}, not running.`
: "Gateway service is loaded but runtime status could not confirm it is running.",
path: state.command?.sourcePath,
target: service.label,
fixHint: "Run `openclaw gateway status --deep` or `openclaw doctor --fix` for repair hints.",
});
}
if (state.runtime?.missingGuiSession) {
findings.push({
checkId: "core/doctor/gateway-daemon",
severity: "warning",
message: "Gateway service cannot attach to the user GUI session.",
path: state.command?.sourcePath,
target: service.label,
fixHint: state.runtime.detail ?? "Log into a GUI session, then rerun doctor.",
});
}
if (state.runtime?.missingSupervision || state.runtime?.missingUnit) {
findings.push({
checkId: "core/doctor/gateway-daemon",
severity: "warning",
message: "Gateway service supervision metadata is missing.",
path: state.command?.sourcePath,
target: service.label,
fixHint: state.runtime.detail ?? "Reinstall or reload the Gateway service.",
});
}
const hygiene = getSystemdCgroupHygieneSummary(state.runtime?.systemd);
if (hygiene) {
findings.push({
checkId: "core/doctor/gateway-daemon",
severity: "warning",
message: `Gateway systemd service has risky ${hygiene}.`,
path: state.command?.sourcePath,
target: service.label,
fixHint: "Repair the systemd unit so stale child processes are cleaned up reliably.",
});
}
return findings;
}
function providerCatalogPath(pluginId: string | undefined): string | undefined {
return pluginId ? `plugins.entries.${pluginId}` : undefined;
}

View File

@@ -95,6 +95,12 @@ function createDeps(overrides: Partial<CoreHealthCheckDeps> = {}): CoreHealthChe
async collectProviderCatalogProjectionFindings() {
return [];
},
async collectGatewayHealthFindings() {
return [];
},
async collectGatewayDaemonFindings() {
return [];
},
...overrides,
};
}
@@ -231,6 +237,64 @@ describe("CORE_HEALTH_CHECKS", () => {
);
});
it("exposes gateway health findings as an opt-in structured check", async () => {
const findings: HealthFinding[] = [
{
checkId: "core/doctor/gateway-health",
severity: "warning",
message: "Gateway is not reachable.",
},
];
const collectGatewayHealthFindings = vi.fn(async () => findings);
const check = getCheck(
createCoreHealthChecks(
createDeps({
collectGatewayHealthFindings,
}),
),
"core/doctor/gateway-health",
);
const ctx = {
mode: "lint" as const,
runtime,
cfg: { gateway: { mode: "local" as const } },
};
await expect(check.detect(ctx)).resolves.toBe(findings);
expect(collectGatewayHealthFindings).toHaveBeenCalledWith(ctx);
expect((check as { defaultEnabled?: boolean }).defaultEnabled).toBe(false);
});
it("exposes gateway daemon findings as an opt-in structured check", async () => {
const findings: HealthFinding[] = [
{
checkId: "core/doctor/gateway-daemon",
severity: "warning",
message: "Gateway service is not installed.",
},
];
const collectGatewayDaemonFindings = vi.fn(async () => findings);
const check = getCheck(
createCoreHealthChecks(
createDeps({
collectGatewayDaemonFindings,
}),
),
"core/doctor/gateway-daemon",
);
const ctx = {
mode: "lint" as const,
runtime,
cfg: { gateway: { mode: "local" as const } },
};
await expect(check.detect(ctx)).resolves.toBe(findings);
expect(collectGatewayDaemonFindings).toHaveBeenCalledWith(ctx);
expect((check as { defaultEnabled?: boolean }).defaultEnabled).toBe(false);
});
it("converts unavailable skills into repair-capable health findings", async () => {
const unavailableSkill = createSkill();
const cfg: OpenClawConfig = {

View File

@@ -47,6 +47,8 @@ import type {
const BROWSER_CLAWD_PROFILE_RESIDUE_CHECK_ID = "core/doctor/browser-clawd-profile-residue";
const CODEX_SESSION_ROUTES_CHECK_ID = "core/doctor/codex-session-routes";
const FINAL_CONFIG_VALIDATION_CHECK_ID = "core/doctor/final-config-validation";
const GATEWAY_DAEMON_CHECK_ID = "core/doctor/gateway-daemon";
const GATEWAY_HEALTH_CHECK_ID = "core/doctor/gateway-health";
const GATEWAY_SERVICES_EXTRA_CHECK_ID = "core/doctor/gateway-services/extra";
const SESSION_LOCKS_CHECK_ID = "core/doctor/session-locks";
@@ -72,6 +74,12 @@ export type CoreHealthCheckDeps = {
readonly collectProviderCatalogProjectionFindings: (
ctx: HealthCheckContext,
) => Promise<readonly HealthFinding[]>;
readonly collectGatewayHealthFindings: (
ctx: HealthCheckContext,
) => Promise<readonly HealthFinding[]>;
readonly collectGatewayDaemonFindings: (
ctx: HealthCheckContext,
) => Promise<readonly HealthFinding[]>;
};
async function detectUnavailableSkillsWithRuntime(
@@ -116,12 +124,28 @@ async function collectProviderCatalogProjectionFindingsWithRuntime(
return runtime.collectProviderCatalogProjectionFindings(ctx.cfg);
}
async function collectGatewayHealthFindingsWithRuntime(
ctx: HealthCheckContext,
): Promise<readonly HealthFinding[]> {
const runtime = await loadDoctorCoreChecksRuntimeModule();
return runtime.collectGatewayHealthFindings(ctx);
}
async function collectGatewayDaemonFindingsWithRuntime(
ctx: HealthCheckContext,
): Promise<readonly HealthFinding[]> {
const runtime = await loadDoctorCoreChecksRuntimeModule();
return runtime.collectGatewayDaemonFindings(ctx);
}
const defaultCoreHealthCheckDeps: CoreHealthCheckDeps = {
detectUnavailableSkills: detectUnavailableSkillsWithRuntime,
collectSecurityWarnings: collectSecurityWarningsWithRuntime,
collectWorkspaceSuggestionNotes: collectWorkspaceSuggestionNotesWithRuntime,
collectRuntimeToolSchemaFindings: collectRuntimeToolSchemaFindingsWithRuntime,
collectProviderCatalogProjectionFindings: collectProviderCatalogProjectionFindingsWithRuntime,
collectGatewayHealthFindings: collectGatewayHealthFindingsWithRuntime,
collectGatewayDaemonFindings: collectGatewayDaemonFindingsWithRuntime,
};
export function configValidationIssuesToHealthFindings(
@@ -736,6 +760,32 @@ const gatewayPlatformNotesCheck: HealthCheck = {
},
};
function createGatewayHealthCheck(deps: CoreHealthCheckDeps): SplitHealthCheckInput {
return {
id: GATEWAY_HEALTH_CHECK_ID,
kind: "core",
description: "Gateway reachability is represented as structured findings.",
source: "doctor",
defaultEnabled: false,
async detect(ctx) {
return deps.collectGatewayHealthFindings(ctx);
},
};
}
function createGatewayDaemonCheck(deps: CoreHealthCheckDeps): SplitHealthCheckInput {
return {
id: GATEWAY_DAEMON_CHECK_ID,
kind: "core",
description: "Local Gateway daemon service state is represented as structured findings.",
source: "doctor",
defaultEnabled: false,
async detect(ctx) {
return deps.collectGatewayDaemonFindings(ctx);
},
};
}
const sessionLocksCheck: SplitHealthCheckInput = {
id: SESSION_LOCKS_CHECK_ID,
kind: "core",
@@ -1022,6 +1072,8 @@ function createConvertedWorkflowChecks(
uiProtocolFreshnessCheck,
gatewayServicesExtraCheck,
gatewayPlatformNotesCheck,
createGatewayHealthCheck(deps),
createGatewayDaemonCheck(deps),
createSecurityCheck(deps),
browserCheck,
openAIOAuthTlsCheck,

View File

@@ -1634,6 +1634,7 @@ export function resolveDoctorHealthContributions(): DoctorHealthContribution[] {
createDoctorHealthContribution({
id: "doctor:gateway-health",
label: "Gateway health",
healthCheckIds: ["core/doctor/gateway-health"],
run: runGatewayHealthChecks,
}),
createDoctorHealthContribution({
@@ -1654,6 +1655,7 @@ export function resolveDoctorHealthContributions(): DoctorHealthContribution[] {
createDoctorHealthContribution({
id: "doctor:gateway-daemon",
label: "Gateway daemon",
healthCheckIds: ["core/doctor/gateway-daemon"],
run: runGatewayDaemonHealth,
}),
createDoctorHealthContribution({