From 3630d502eb9be9e1be7c685efbb6cd80b445fdcc Mon Sep 17 00:00:00 2001 From: Gio Della-Libera Date: Sat, 27 Jun 2026 17:17:49 -0700 Subject: [PATCH] Doctor: expose gateway runtime findings (#97075) * feat(doctor): expose gateway runtime findings * fix(doctor): redact gateway health targets --- src/flows/doctor-core-checks.runtime.test.ts | 135 ++++++++++++++++- src/flows/doctor-core-checks.runtime.ts | 145 ++++++++++++++++++- src/flows/doctor-core-checks.test.ts | 64 ++++++++ src/flows/doctor-core-checks.ts | 52 +++++++ src/flows/doctor-health-contributions.ts | 2 + 5 files changed, 395 insertions(+), 3 deletions(-) diff --git a/src/flows/doctor-core-checks.runtime.test.ts b/src/flows/doctor-core-checks.runtime.test.ts index 0a4a15ffaeb..2af73771044 100644 --- a/src/flows/doctor-core-checks.runtime.test.ts +++ b/src/flows/doctor-core-checks.runtime.test.ts @@ -9,6 +9,10 @@ const mocks = vi.hoisted(() => ({ disposeBundleRuntime: vi.fn(), loadModelCatalog: vi.fn(async (): Promise>> => []), normalizeProviderToolSchemasWithPlugin: vi.fn(), + buildGatewayProbeConnectionDetails: vi.fn(), + probeGatewayStatus: vi.fn(), + readGatewayServiceState: vi.fn(), + resolveGatewayService: vi.fn(() => ({ label: "openclaw-gateway" })), resolvePluginProviders: vi.fn((): Array> => []), resolveDefaultModelForAgent: vi.fn(() => ({ provider: "openai", model: "gpt-5.5" })), })); @@ -35,6 +39,19 @@ vi.mock("../agents/agent-tools.js", () => ({ createOpenClawCodingTools: mocks.createOpenClawCodingTools, })); +vi.mock("../gateway/call.js", () => ({ + buildGatewayProbeConnectionDetails: mocks.buildGatewayProbeConnectionDetails, +})); + +vi.mock("../cli/daemon-cli/probe.js", () => ({ + probeGatewayStatus: mocks.probeGatewayStatus, +})); + +vi.mock("../daemon/service.js", () => ({ + readGatewayServiceState: mocks.readGatewayServiceState, + resolveGatewayService: mocks.resolveGatewayService, +})); + vi.mock("../plugins/provider-runtime.js", () => ({ inspectProviderToolSchemasWithPlugin: () => [], normalizeProviderToolSchemasWithPlugin: mocks.normalizeProviderToolSchemasWithPlugin, @@ -48,8 +65,12 @@ vi.mock("../plugins/providers.runtime.js", () => ({ resolvePluginProviders: mocks.resolvePluginProviders, })); -const { collectProviderCatalogProjectionFindings, collectRuntimeToolSchemaFindings } = - await import("./doctor-core-checks.runtime.js"); +const { + collectGatewayDaemonFindings, + collectGatewayHealthFindings, + collectProviderCatalogProjectionFindings, + collectRuntimeToolSchemaFindings, +} = await import("./doctor-core-checks.runtime.js"); function tool(name: string, parameters: unknown): AnyAgentTool { return { @@ -79,6 +100,22 @@ describe("doctor runtime tool schema checks", () => { mocks.normalizeProviderToolSchemasWithPlugin .mockReset() .mockImplementation(({ context }) => context.tools); + mocks.buildGatewayProbeConnectionDetails.mockReset().mockResolvedValue({ + url: "http://127.0.0.1:5829", + }); + mocks.probeGatewayStatus.mockReset().mockResolvedValue({ + ok: true, + server: { version: "2026.6.26" }, + }); + mocks.readGatewayServiceState.mockReset().mockResolvedValue({ + installed: true, + loaded: true, + running: true, + env: {}, + command: { programArguments: ["openclaw", "gateway"], sourcePath: "/tmp/gateway.service" }, + runtime: { status: "running" }, + }); + mocks.resolveGatewayService.mockClear(); mocks.resolvePluginProviders.mockReset().mockReturnValue([]); mocks.resolveDefaultModelForAgent.mockClear(); }); @@ -503,6 +540,100 @@ describe("doctor runtime tool schema checks", () => { }); }); +describe("doctor gateway runtime checks", () => { + beforeEach(() => { + mocks.buildGatewayProbeConnectionDetails.mockReset().mockResolvedValue({ + url: "http://127.0.0.1:5829", + }); + mocks.probeGatewayStatus.mockReset().mockResolvedValue({ + ok: true, + server: { version: "2026.6.26" }, + }); + mocks.readGatewayServiceState.mockReset().mockResolvedValue({ + installed: true, + loaded: true, + running: true, + env: {}, + command: { programArguments: ["openclaw", "gateway"], sourcePath: "/tmp/gateway.service" }, + runtime: { status: "running" }, + }); + mocks.resolveGatewayService.mockReset().mockReturnValue({ label: "openclaw-gateway" }); + }); + + it("reports unreachable gateway health probes", async () => { + mocks.probeGatewayStatus.mockResolvedValueOnce({ + ok: false, + error: "connect ECONNREFUSED 127.0.0.1:5829", + }); + + await expect( + collectGatewayHealthFindings({ cfg: { gateway: { mode: "local" } } }), + ).resolves.toContainEqual({ + checkId: "core/doctor/gateway-health", + severity: "warning", + message: "Gateway is not reachable: connect ECONNREFUSED 127.0.0.1:5829", + path: "gateway.mode", + target: "http://127.0.0.1:5829", + fixHint: + "Start the Gateway service or run `openclaw doctor --fix` for service repair prompts.", + }); + }); + + it("redacts sensitive remote gateway URLs from health finding targets", async () => { + mocks.buildGatewayProbeConnectionDetails.mockResolvedValueOnce({ + url: "wss://user:pass@gateway.example.test/rpc?token=secret&safe=value", + }); + mocks.probeGatewayStatus.mockResolvedValueOnce({ + ok: false, + error: "remote gateway did not answer", + }); + + const findings = await collectGatewayHealthFindings({ + cfg: { gateway: { mode: "remote", remote: { url: "wss://gateway.example.test/rpc" } } }, + }); + + expect(findings).toContainEqual({ + checkId: "core/doctor/gateway-health", + severity: "warning", + message: "Gateway is not reachable: remote gateway did not answer", + path: "gateway.remote.url", + target: "wss://***:***@gateway.example.test/rpc?token=***&safe=value", + fixHint: "Verify the remote Gateway URL, network path, TLS settings, and credentials.", + }); + expect(JSON.stringify(findings)).not.toContain("user:pass"); + expect(JSON.stringify(findings)).not.toContain("token=secret"); + }); + + it("reports missing local gateway daemon service", async () => { + mocks.readGatewayServiceState.mockResolvedValueOnce({ + installed: false, + loaded: false, + running: false, + env: {}, + command: null, + }); + + await expect( + collectGatewayDaemonFindings({ cfg: { gateway: { mode: "local" } } }), + ).resolves.toContainEqual({ + checkId: "core/doctor/gateway-daemon", + severity: "warning", + message: "Gateway service is not installed.", + path: "gateway.mode", + target: "openclaw-gateway", + fixHint: "Run `openclaw doctor --fix` or `openclaw gateway install` to install it.", + }); + }); + + it("skips daemon findings for remote gateway mode", async () => { + await expect( + collectGatewayDaemonFindings({ cfg: { gateway: { mode: "remote" } } }), + ).resolves.toEqual([]); + + expect(mocks.readGatewayServiceState).not.toHaveBeenCalled(); + }); +}); + describe("doctor provider catalog projection checks", () => { beforeEach(() => { mocks.resolvePluginProviders.mockReset().mockReturnValue([]); diff --git a/src/flows/doctor-core-checks.runtime.ts b/src/flows/doctor-core-checks.runtime.ts index afd2c11ed7b..b08dd5efe46 100644 --- a/src/flows/doctor-core-checks.runtime.ts +++ b/src/flows/doctor-core-checks.runtime.ts @@ -1,4 +1,5 @@ // Doctor runtime checks inspect tool names, browser residue, and runtime state. +import { redactSensitiveUrlLikeString } from "@openclaw/net-policy/redact-sensitive-url"; import { TOOL_NAME_SEPARATOR } from "../agents/agent-bundle-mcp-names.js"; import { type McpToolCatalogDiagnostic, @@ -30,20 +31,32 @@ import { type RuntimeToolSchemaDiagnostic, } from "../agents/tool-schema-projection.js"; import type { AnyAgentTool } from "../agents/tools/common.js"; +import { probeGatewayStatus } from "../cli/daemon-cli/probe.js"; import { collectUnavailableAgentSkills } from "../commands/doctor-skills-core.js"; +import { gatewayProbeResultSawGateway } from "../commands/gateway-health-auth-diagnostic.js"; import type { OpenClawConfig } from "../config/types.openclaw.js"; +import { + getSystemdCgroupHygieneSummary, + type GatewayServiceRuntime, +} from "../daemon/service-runtime.js"; +import { resolveGatewayService, readGatewayServiceState } from "../daemon/service.js"; +import { buildGatewayProbeConnectionDetails } from "../gateway/call.js"; import { formatErrorMessage } from "../infra/errors.js"; import type { ProviderRuntimeModel } from "../plugins/provider-runtime-model.types.js"; import { getPluginToolMeta, setPluginToolMeta } from "../plugins/tools.js"; import type { ProviderCatalogOrder, ProviderPlugin } from "../plugins/types.js"; import { normalizeAgentId } from "../routing/session-key.js"; import { buildWorkspaceSkillStatus, type SkillStatusEntry } from "../skills/discovery/status.js"; -import type { HealthFinding } from "./health-checks.js"; +import type { HealthCheckContext, HealthFinding } from "./health-checks.js"; type BundleMcpToolRuntime = Awaited>; const PROVIDER_CATALOG_ORDERS = ["simple", "profile", "paired", "late"] as const; const PROVIDER_CATALOG_ORDER_SET = new Set(PROVIDER_CATALOG_ORDERS); +function formatGatewayHealthTarget(url: string): string { + return redactSensitiveUrlLikeString(url); +} + export function detectUnavailableSkills(cfg: OpenClawConfig): SkillStatusEntry[] { const agentId = resolveDefaultAgentId(cfg); const workspaceDir = resolveAgentWorkspaceDir(cfg, agentId); @@ -54,6 +67,136 @@ export function detectUnavailableSkills(cfg: OpenClawConfig): SkillStatusEntry[] return collectUnavailableAgentSkills(report); } +export async function collectGatewayHealthFindings( + ctx: Pick, +): Promise { + let probeDetails: Awaited>; + try { + probeDetails = await buildGatewayProbeConnectionDetails({ + config: ctx.cfg, + ...(ctx.configPath ? { configPath: ctx.configPath } : {}), + }); + } catch (error) { + return [ + { + checkId: "core/doctor/gateway-health", + severity: "warning", + message: `Gateway health probe could not be prepared: ${formatErrorMessage(error)}`, + path: ctx.cfg.gateway?.mode === "remote" ? "gateway.remote.url" : "gateway", + fixHint: + "Fix Gateway connection configuration, then rerun `openclaw doctor --lint --only core/doctor/gateway-health`.", + }, + ]; + } + + const probe = await probeGatewayStatus({ + url: probeDetails.url, + timeoutMs: 3000, + tlsFingerprint: probeDetails.tlsFingerprint, + preauthHandshakeTimeoutMs: probeDetails.preauthHandshakeTimeoutMs, + config: ctx.cfg, + json: true, + }); + if (gatewayProbeResultSawGateway(probe)) { + return []; + } + const mode = ctx.cfg.gateway?.mode === "remote" ? "remote" : "local"; + return [ + { + checkId: "core/doctor/gateway-health", + severity: "warning", + message: `Gateway is not reachable: ${probe.error ?? "status probe failed"}`, + path: mode === "remote" ? "gateway.remote.url" : "gateway.mode", + target: formatGatewayHealthTarget(probeDetails.url), + fixHint: + mode === "remote" + ? "Verify the remote Gateway URL, network path, TLS settings, and credentials." + : "Start the Gateway service or run `openclaw doctor --fix` for service repair prompts.", + }, + ]; +} + +function gatewayRuntimeStatus(runtime: GatewayServiceRuntime | undefined): string | undefined { + return runtime?.status ?? runtime?.state ?? runtime?.subState; +} + +export async function collectGatewayDaemonFindings( + ctx: Pick, +): Promise { + if (ctx.cfg.gateway?.mode === "remote") { + return []; + } + const service = resolveGatewayService(); + const state = await readGatewayServiceState(service, { env: process.env }); + const findings: HealthFinding[] = []; + if (!state.installed) { + findings.push({ + checkId: "core/doctor/gateway-daemon", + severity: "warning", + message: "Gateway service is not installed.", + path: "gateway.mode", + target: service.label, + fixHint: "Run `openclaw doctor --fix` or `openclaw gateway install` to install it.", + }); + return findings; + } + if (!state.loaded) { + findings.push({ + checkId: "core/doctor/gateway-daemon", + severity: "warning", + message: "Gateway service is installed but not loaded.", + path: state.command?.sourcePath, + target: service.label, + fixHint: "Run `openclaw doctor --fix` or `openclaw gateway start` to load it.", + }); + } + const status = gatewayRuntimeStatus(state.runtime); + if (state.loaded && !state.running) { + findings.push({ + checkId: "core/doctor/gateway-daemon", + severity: "warning", + message: status + ? `Gateway service runtime is ${status}, not running.` + : "Gateway service is loaded but runtime status could not confirm it is running.", + path: state.command?.sourcePath, + target: service.label, + fixHint: "Run `openclaw gateway status --deep` or `openclaw doctor --fix` for repair hints.", + }); + } + if (state.runtime?.missingGuiSession) { + findings.push({ + checkId: "core/doctor/gateway-daemon", + severity: "warning", + message: "Gateway service cannot attach to the user GUI session.", + path: state.command?.sourcePath, + target: service.label, + fixHint: state.runtime.detail ?? "Log into a GUI session, then rerun doctor.", + }); + } + if (state.runtime?.missingSupervision || state.runtime?.missingUnit) { + findings.push({ + checkId: "core/doctor/gateway-daemon", + severity: "warning", + message: "Gateway service supervision metadata is missing.", + path: state.command?.sourcePath, + target: service.label, + fixHint: state.runtime.detail ?? "Reinstall or reload the Gateway service.", + }); + } + const hygiene = getSystemdCgroupHygieneSummary(state.runtime?.systemd); + if (hygiene) { + findings.push({ + checkId: "core/doctor/gateway-daemon", + severity: "warning", + message: `Gateway systemd service has risky ${hygiene}.`, + path: state.command?.sourcePath, + target: service.label, + fixHint: "Repair the systemd unit so stale child processes are cleaned up reliably.", + }); + } + return findings; +} + function providerCatalogPath(pluginId: string | undefined): string | undefined { return pluginId ? `plugins.entries.${pluginId}` : undefined; } diff --git a/src/flows/doctor-core-checks.test.ts b/src/flows/doctor-core-checks.test.ts index f6ca7327cb8..0eed125eb14 100644 --- a/src/flows/doctor-core-checks.test.ts +++ b/src/flows/doctor-core-checks.test.ts @@ -95,6 +95,12 @@ function createDeps(overrides: Partial = {}): CoreHealthChe async collectProviderCatalogProjectionFindings() { return []; }, + async collectGatewayHealthFindings() { + return []; + }, + async collectGatewayDaemonFindings() { + return []; + }, ...overrides, }; } @@ -231,6 +237,64 @@ describe("CORE_HEALTH_CHECKS", () => { ); }); + it("exposes gateway health findings as an opt-in structured check", async () => { + const findings: HealthFinding[] = [ + { + checkId: "core/doctor/gateway-health", + severity: "warning", + message: "Gateway is not reachable.", + }, + ]; + const collectGatewayHealthFindings = vi.fn(async () => findings); + const check = getCheck( + createCoreHealthChecks( + createDeps({ + collectGatewayHealthFindings, + }), + ), + "core/doctor/gateway-health", + ); + const ctx = { + mode: "lint" as const, + runtime, + cfg: { gateway: { mode: "local" as const } }, + }; + + await expect(check.detect(ctx)).resolves.toBe(findings); + + expect(collectGatewayHealthFindings).toHaveBeenCalledWith(ctx); + expect((check as { defaultEnabled?: boolean }).defaultEnabled).toBe(false); + }); + + it("exposes gateway daemon findings as an opt-in structured check", async () => { + const findings: HealthFinding[] = [ + { + checkId: "core/doctor/gateway-daemon", + severity: "warning", + message: "Gateway service is not installed.", + }, + ]; + const collectGatewayDaemonFindings = vi.fn(async () => findings); + const check = getCheck( + createCoreHealthChecks( + createDeps({ + collectGatewayDaemonFindings, + }), + ), + "core/doctor/gateway-daemon", + ); + const ctx = { + mode: "lint" as const, + runtime, + cfg: { gateway: { mode: "local" as const } }, + }; + + await expect(check.detect(ctx)).resolves.toBe(findings); + + expect(collectGatewayDaemonFindings).toHaveBeenCalledWith(ctx); + expect((check as { defaultEnabled?: boolean }).defaultEnabled).toBe(false); + }); + it("converts unavailable skills into repair-capable health findings", async () => { const unavailableSkill = createSkill(); const cfg: OpenClawConfig = { diff --git a/src/flows/doctor-core-checks.ts b/src/flows/doctor-core-checks.ts index b5ec92514df..f3517cef147 100644 --- a/src/flows/doctor-core-checks.ts +++ b/src/flows/doctor-core-checks.ts @@ -47,6 +47,8 @@ import type { const BROWSER_CLAWD_PROFILE_RESIDUE_CHECK_ID = "core/doctor/browser-clawd-profile-residue"; const CODEX_SESSION_ROUTES_CHECK_ID = "core/doctor/codex-session-routes"; const FINAL_CONFIG_VALIDATION_CHECK_ID = "core/doctor/final-config-validation"; +const GATEWAY_DAEMON_CHECK_ID = "core/doctor/gateway-daemon"; +const GATEWAY_HEALTH_CHECK_ID = "core/doctor/gateway-health"; const GATEWAY_SERVICES_EXTRA_CHECK_ID = "core/doctor/gateway-services/extra"; const SESSION_LOCKS_CHECK_ID = "core/doctor/session-locks"; @@ -72,6 +74,12 @@ export type CoreHealthCheckDeps = { readonly collectProviderCatalogProjectionFindings: ( ctx: HealthCheckContext, ) => Promise; + readonly collectGatewayHealthFindings: ( + ctx: HealthCheckContext, + ) => Promise; + readonly collectGatewayDaemonFindings: ( + ctx: HealthCheckContext, + ) => Promise; }; async function detectUnavailableSkillsWithRuntime( @@ -116,12 +124,28 @@ async function collectProviderCatalogProjectionFindingsWithRuntime( return runtime.collectProviderCatalogProjectionFindings(ctx.cfg); } +async function collectGatewayHealthFindingsWithRuntime( + ctx: HealthCheckContext, +): Promise { + const runtime = await loadDoctorCoreChecksRuntimeModule(); + return runtime.collectGatewayHealthFindings(ctx); +} + +async function collectGatewayDaemonFindingsWithRuntime( + ctx: HealthCheckContext, +): Promise { + const runtime = await loadDoctorCoreChecksRuntimeModule(); + return runtime.collectGatewayDaemonFindings(ctx); +} + const defaultCoreHealthCheckDeps: CoreHealthCheckDeps = { detectUnavailableSkills: detectUnavailableSkillsWithRuntime, collectSecurityWarnings: collectSecurityWarningsWithRuntime, collectWorkspaceSuggestionNotes: collectWorkspaceSuggestionNotesWithRuntime, collectRuntimeToolSchemaFindings: collectRuntimeToolSchemaFindingsWithRuntime, collectProviderCatalogProjectionFindings: collectProviderCatalogProjectionFindingsWithRuntime, + collectGatewayHealthFindings: collectGatewayHealthFindingsWithRuntime, + collectGatewayDaemonFindings: collectGatewayDaemonFindingsWithRuntime, }; export function configValidationIssuesToHealthFindings( @@ -736,6 +760,32 @@ const gatewayPlatformNotesCheck: HealthCheck = { }, }; +function createGatewayHealthCheck(deps: CoreHealthCheckDeps): SplitHealthCheckInput { + return { + id: GATEWAY_HEALTH_CHECK_ID, + kind: "core", + description: "Gateway reachability is represented as structured findings.", + source: "doctor", + defaultEnabled: false, + async detect(ctx) { + return deps.collectGatewayHealthFindings(ctx); + }, + }; +} + +function createGatewayDaemonCheck(deps: CoreHealthCheckDeps): SplitHealthCheckInput { + return { + id: GATEWAY_DAEMON_CHECK_ID, + kind: "core", + description: "Local Gateway daemon service state is represented as structured findings.", + source: "doctor", + defaultEnabled: false, + async detect(ctx) { + return deps.collectGatewayDaemonFindings(ctx); + }, + }; +} + const sessionLocksCheck: SplitHealthCheckInput = { id: SESSION_LOCKS_CHECK_ID, kind: "core", @@ -1022,6 +1072,8 @@ function createConvertedWorkflowChecks( uiProtocolFreshnessCheck, gatewayServicesExtraCheck, gatewayPlatformNotesCheck, + createGatewayHealthCheck(deps), + createGatewayDaemonCheck(deps), createSecurityCheck(deps), browserCheck, openAIOAuthTlsCheck, diff --git a/src/flows/doctor-health-contributions.ts b/src/flows/doctor-health-contributions.ts index 8bfc2044f5d..98397d2d8b4 100644 --- a/src/flows/doctor-health-contributions.ts +++ b/src/flows/doctor-health-contributions.ts @@ -1634,6 +1634,7 @@ export function resolveDoctorHealthContributions(): DoctorHealthContribution[] { createDoctorHealthContribution({ id: "doctor:gateway-health", label: "Gateway health", + healthCheckIds: ["core/doctor/gateway-health"], run: runGatewayHealthChecks, }), createDoctorHealthContribution({ @@ -1654,6 +1655,7 @@ export function resolveDoctorHealthContributions(): DoctorHealthContribution[] { createDoctorHealthContribution({ id: "doctor:gateway-daemon", label: "Gateway daemon", + healthCheckIds: ["core/doctor/gateway-daemon"], run: runGatewayDaemonHealth, }), createDoctorHealthContribution({