diff --git a/CHANGELOG.md b/CHANGELOG.md index 341e2760a56..d6e771475dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ Docs: https://docs.openclaw.ai ### Fixes - Agents/sessions: preserve terminal lifecycle state when final run metadata persists from a stale in-memory snapshot, preventing `main` sessions from staying stuck as running after completed or timed-out turns. +- Gateway/CLI: make `openclaw gateway start` repair stale managed service definitions that point at old OpenClaw versions, missing binaries, or temporary installer paths before starting. - Status: show the `openai-codex` OAuth profile for `openai/gpt-*` sessions running through the native Codex runtime instead of reporting auth as unknown. (#76197) Thanks @mbelinky. - Plugins/externalization: keep diagnostics ClawHub packages and persisted bundled-plugin relocation on npm-first install metadata for launch, and omit Discord from the core package now that its external package is published. Thanks @vincentkoc. - Plugins/Codex: allow the official npm Codex plugin to install without the unsafe-install override, keep `/codex` command ownership, and cover the real npm Docker live path through managed `.openclaw/npm` dependencies plus uninstall failure proof. diff --git a/src/cli/daemon-cli/install.ts b/src/cli/daemon-cli/install.ts index 62fbabae874..d31f14c08fa 100644 --- a/src/cli/daemon-cli/install.ts +++ b/src/cli/daemon-cli/install.ts @@ -31,7 +31,7 @@ import { } from "./shared.js"; import type { DaemonInstallOptions } from "./types.js"; -function mergeInstallInvocationEnv(params: { +export function mergeInstallInvocationEnv(params: { env: NodeJS.ProcessEnv; existingServiceEnv?: Record; }): NodeJS.ProcessEnv { diff --git a/src/cli/daemon-cli/lifecycle-core.test.ts b/src/cli/daemon-cli/lifecycle-core.test.ts index ba132e936b7..74a5c3d5604 100644 --- a/src/cli/daemon-cli/lifecycle-core.test.ts +++ b/src/cli/daemon-cli/lifecycle-core.test.ts @@ -353,6 +353,55 @@ describe("runServiceRestart token drift", () => { expect(payload.message).toBe("restart scheduled, gateway will restart momentarily"); }); + it("repairs stale loaded services during start before reporting success", async () => { + service.readCommand.mockResolvedValue({ + programArguments: ["openclaw", "gateway"], + environment: { OPENCLAW_SERVICE_VERSION: "2026.4.24" }, + }); + const repairLoadedService = vi.fn(async () => ({ + result: "started" as const, + message: "Gateway service definition repaired and started.", + warnings: ["service was installed by OpenClaw 2026.4.24, current CLI is 2026.5.2"], + loaded: true, + })); + + await runServiceStart({ + serviceNoun: "Gateway", + service, + renderStartHints: () => [], + opts: { json: true }, + repairLoadedService, + }); + + expect(repairLoadedService).toHaveBeenCalledTimes(1); + expect(service.restart).not.toHaveBeenCalled(); + const payload = readJsonLog<{ + result?: string; + message?: string; + warnings?: string[]; + service?: { loaded?: boolean }; + }>(); + expect(payload.result).toBe("started"); + expect(payload.message).toBe("Gateway service definition repaired and started."); + expect(payload.warnings?.[0]).toContain("service was installed by OpenClaw"); + expect(payload.service?.loaded).toBe(true); + }); + + it("fails start with an install hint when a stale loaded service has no repair callback", async () => { + service.readCommand.mockResolvedValue({ + programArguments: ["openclaw", "gateway"], + environment: { OPENCLAW_SERVICE_VERSION: "2026.4.24" }, + }); + + await expect(runServiceStart(createServiceRunArgs())).rejects.toThrow("__exit__:1"); + + const payload = readJsonLog<{ ok?: boolean; error?: string; hints?: string[] }>(); + expect(payload.ok).toBe(false); + expect(payload.error).toContain("service needs repair"); + expect(payload.hints).toEqual(["openclaw gateway install --force"]); + expect(service.restart).not.toHaveBeenCalled(); + }); + it("fails start when restarting a stopped installed service errors", async () => { service.isLoaded.mockResolvedValue(false); service.restart.mockRejectedValue(new Error("launchctl kickstart failed: permission denied")); diff --git a/src/cli/daemon-cli/lifecycle-core.ts b/src/cli/daemon-cli/lifecycle-core.ts index ff5ad322ddb..9bd867f7ea4 100644 --- a/src/cli/daemon-cli/lifecycle-core.ts +++ b/src/cli/daemon-cli/lifecycle-core.ts @@ -5,6 +5,7 @@ import { formatConfigIssueLines } from "../../config/issue-format.js"; import { resolveIsNixMode } from "../../config/paths.js"; import { checkTokenDrift } from "../../daemon/service-audit.js"; import type { GatewayServiceRestartResult } from "../../daemon/service-types.js"; +import type { GatewayServiceStartRepairIssue, GatewayServiceState } from "../../daemon/service.js"; import { describeGatewayServiceRestart, startGatewayService } from "../../daemon/service.js"; import type { GatewayService } from "../../daemon/service.js"; import { renderSystemdUnavailableHints } from "../../daemon/systemd-hints.js"; @@ -16,6 +17,7 @@ import { } from "../../infra/restart.js"; import { isWSL } from "../../infra/wsl.js"; import { defaultRuntime } from "../../runtime.js"; +import { formatCliCommand } from "../command-format.js"; import { resolveGatewayTokenForDriftCheck } from "./gateway-token-drift.js"; import { buildDaemonServiceSnapshot, @@ -48,6 +50,11 @@ type ServiceRecoveryContext = { fail: (message: string, hints?: string[]) => void; }; +type ServiceStartRepairContext = ServiceRecoveryContext & { + state: GatewayServiceState; + issues: GatewayServiceStartRepairIssue[]; +}; + async function maybeAugmentSystemdHints(hints: string[]): Promise { if (process.platform !== "linux") { return hints; @@ -221,6 +228,7 @@ export async function runServiceStart(params: { renderStartHints: () => string[]; opts?: DaemonLifecycleOptions; onNotLoaded?: (ctx: ServiceRecoveryContext) => Promise; + repairLoadedService?: (ctx: ServiceStartRepairContext) => Promise; }) { const json = Boolean(params.opts?.json); const { stdout, emit, fail } = createDaemonActionContext({ action: "start", json }); @@ -298,6 +306,41 @@ export async function runServiceStart(params: { }); return; } + if (startResult.outcome === "repair-required") { + try { + const handled = await params.repairLoadedService?.({ + json, + stdout, + fail, + state: startResult.state, + issues: startResult.issues, + }); + if (handled) { + emit({ + ok: true, + result: handled.result, + message: handled.message, + warnings: handled.warnings, + service: buildDaemonServiceSnapshot(params.service, handled.loaded ?? true), + }); + if (!json && handled.message) { + defaultRuntime.log(handled.message); + } + return; + } + } catch (err) { + const hints = params.renderStartHints(); + fail(`${params.serviceNoun} repair failed: ${String(err)}`, hints); + return; + } + fail( + `${params.serviceNoun} service needs repair before it can start: ${startResult.issues + .map((issue) => issue.message) + .join("; ")}`, + [formatCliCommand("openclaw gateway install --force")], + ); + return; + } emit({ ok: true, result: "started", diff --git a/src/cli/daemon-cli/lifecycle.test.ts b/src/cli/daemon-cli/lifecycle.test.ts index df625839b61..bd5cb7f3507 100644 --- a/src/cli/daemon-cli/lifecycle.test.ts +++ b/src/cli/daemon-cli/lifecycle.test.ts @@ -52,6 +52,7 @@ const probeGateway = vi.fn< const isRestartEnabled = vi.fn<(config?: { commands?: unknown }) => boolean>(() => true); const loadConfig = vi.hoisted(() => vi.fn(() => ({}))); const recoverInstalledLaunchAgent = vi.hoisted(() => vi.fn()); +const repairLoadedGatewayServiceForStart = vi.hoisted(() => vi.fn()); vi.mock("../../config/config.js", () => ({ getRuntimeConfig: () => loadConfig(), @@ -89,6 +90,10 @@ vi.mock("./launchd-recovery.js", () => ({ recoverInstalledLaunchAgent(args), })); +vi.mock("./start-repair.js", () => ({ + repairLoadedGatewayServiceForStart: (args: unknown) => repairLoadedGatewayServiceForStart(args), +})); + vi.mock("./restart-health.js", () => ({ DEFAULT_RESTART_HEALTH_ATTEMPTS: 120, DEFAULT_RESTART_HEALTH_DELAY_MS: 500, @@ -160,6 +165,7 @@ describe("runDaemonRestart health checks", () => { isRestartEnabled.mockReset(); loadConfig.mockReset(); recoverInstalledLaunchAgent.mockReset(); + repairLoadedGatewayServiceForStart.mockReset(); service.readCommand.mockResolvedValue({ programArguments: ["openclaw", "gateway", "--port", "18789"], @@ -224,6 +230,46 @@ describe("runDaemonRestart health checks", () => { expect(recoverInstalledLaunchAgent).toHaveBeenCalledWith({ result: "started" }); }); + it("repairs stale loaded service definitions from gateway start", async () => { + repairLoadedGatewayServiceForStart.mockResolvedValue({ + result: "started", + message: "Gateway service definition repaired and started.", + loaded: true, + }); + runServiceStart.mockImplementation( + async (params: { + repairLoadedService?: (args: { + json: boolean; + stdout: NodeJS.WritableStream; + state: unknown; + issues: unknown[]; + }) => Promise; + }) => { + await params.repairLoadedService?.({ + json: true, + stdout: process.stdout, + state: { command: { environment: { OPENCLAW_SERVICE_VERSION: "2026.4.24" } } }, + issues: [{ code: "version-mismatch", message: "old service" }], + }); + }, + ); + + await runDaemonStart({ json: true }); + + expect(repairLoadedGatewayServiceForStart).toHaveBeenCalledWith( + expect.objectContaining({ + service, + json: true, + state: expect.objectContaining({ + command: expect.objectContaining({ + environment: { OPENCLAW_SERVICE_VERSION: "2026.4.24" }, + }), + }), + issues: [expect.objectContaining({ code: "version-mismatch" })], + }), + ); + }); + it("kills stale gateway pids and retries restart", async () => { const unhealthy: RestartHealthSnapshot = { healthy: false, diff --git a/src/cli/daemon-cli/lifecycle.ts b/src/cli/daemon-cli/lifecycle.ts index e0a9ce80daf..39d24aacc33 100644 --- a/src/cli/daemon-cli/lifecycle.ts +++ b/src/cli/daemon-cli/lifecycle.ts @@ -29,6 +29,7 @@ import { waitForGatewayHealthyRestart, } from "./restart-health.js"; import { parsePortFromArgs, renderGatewayServiceStartHints } from "./shared.js"; +import { repairLoadedGatewayServiceForStart } from "./start-repair.js"; import type { DaemonLifecycleOptions } from "./types.js"; const POST_RESTART_HEALTH_ATTEMPTS = DEFAULT_RESTART_HEALTH_ATTEMPTS; @@ -150,14 +151,23 @@ export async function runDaemonUninstall(opts: DaemonLifecycleOptions = {}) { } export async function runDaemonStart(opts: DaemonLifecycleOptions = {}) { + const service = resolveGatewayService(); return await runServiceStart({ serviceNoun: "Gateway", - service: resolveGatewayService(), + service, renderStartHints: renderGatewayServiceStartHints, onNotLoaded: process.platform === "darwin" ? async () => await recoverInstalledLaunchAgent({ result: "started" }) : undefined, + repairLoadedService: async ({ json, stdout, state, issues }) => + await repairLoadedGatewayServiceForStart({ + service, + json, + stdout, + state, + issues, + }), opts, }); } diff --git a/src/cli/daemon-cli/start-repair.ts b/src/cli/daemon-cli/start-repair.ts new file mode 100644 index 00000000000..a7134fe0d05 --- /dev/null +++ b/src/cli/daemon-cli/start-repair.ts @@ -0,0 +1,94 @@ +import { buildGatewayInstallPlan } from "../../commands/daemon-install-helpers.js"; +import { DEFAULT_GATEWAY_DAEMON_RUNTIME } from "../../commands/daemon-runtime.js"; +import { resolveGatewayInstallToken } from "../../commands/gateway-install-token.js"; +import { readConfigFileSnapshotForWrite } from "../../config/io.js"; +import { resolveGatewayPort } from "../../config/paths.js"; +import { OPENCLAW_WRAPPER_ENV_KEY, resolveOpenClawWrapperPath } from "../../daemon/program-args.js"; +import type { GatewayServiceEnv } from "../../daemon/service-types.js"; +import type { + GatewayService, + GatewayServiceStartRepairIssue, + GatewayServiceState, +} from "../../daemon/service.js"; +import { formatGatewayServiceStartRepairIssues } from "../../daemon/service.js"; +import { defaultRuntime } from "../../runtime.js"; +import { mergeInstallInvocationEnv } from "./install.js"; + +export async function repairLoadedGatewayServiceForStart(params: { + service: GatewayService; + state: GatewayServiceState; + issues: GatewayServiceStartRepairIssue[]; + json: boolean; + stdout: NodeJS.WritableStream; +}): Promise<{ result: "started"; message: string; warnings?: string[]; loaded: boolean }> { + const { snapshot: configSnapshot, writeOptions: configWriteOptions } = + await readConfigFileSnapshotForWrite(); + const cfg = configSnapshot.valid ? configSnapshot.sourceConfig : configSnapshot.config; + const existingEnvironment = params.state.command?.environment; + const installEnv = mergeInstallInvocationEnv({ + env: process.env, + existingServiceEnv: existingEnvironment, + }); + const wrapperPath = await resolveOpenClawWrapperPath(installEnv[OPENCLAW_WRAPPER_ENV_KEY]); + const port = resolveGatewayPort(cfg); + + const tokenResolution = await resolveGatewayInstallToken({ + config: cfg, + configSnapshot, + configWriteOptions, + env: installEnv, + autoGenerateWhenMissing: true, + persistGeneratedToken: true, + }); + if (tokenResolution.unavailableReason) { + throw new Error(tokenResolution.unavailableReason); + } + + const warnings = [ + formatGatewayServiceStartRepairIssues(params.issues), + ...tokenResolution.warnings, + ].filter((warning) => warning.trim().length > 0); + if (!params.json) { + defaultRuntime.log("Gateway service definition needs repair:"); + for (const warning of warnings) { + defaultRuntime.log(`- ${warning}`); + } + } + + const { programArguments, workingDirectory, environment } = await buildGatewayInstallPlan({ + env: installEnv, + port, + runtime: DEFAULT_GATEWAY_DAEMON_RUNTIME, + wrapperPath, + existingEnvironment, + config: cfg, + warn: (message) => { + warnings.push(message); + if (!params.json) { + defaultRuntime.log(`- ${message}`); + } + }, + }); + + await params.service.install({ + env: installEnv as GatewayServiceEnv, + stdout: params.stdout, + programArguments, + workingDirectory, + environment, + }); + + let loaded = true; + try { + loaded = await params.service.isLoaded({ env: installEnv }); + } catch { + loaded = true; + } + + return { + result: "started", + message: "Gateway service definition repaired and started.", + warnings: warnings.length ? warnings : undefined, + loaded, + }; +} diff --git a/src/daemon/service-types.ts b/src/daemon/service-types.ts index 2fccd0efc5b..0a6cf6b5ef3 100644 --- a/src/daemon/service-types.ts +++ b/src/daemon/service-types.ts @@ -48,10 +48,20 @@ export type GatewayServiceState = { runtime?: GatewayServiceRuntime; }; +export type GatewayServiceStartRepairIssue = { + code: "missing-program" | "temporary-program" | "version-mismatch"; + message: string; +}; + export type GatewayServiceStartResult = | { outcome: "started"; state: GatewayServiceState } | { outcome: "scheduled"; state: GatewayServiceState } - | { outcome: "missing-install"; state: GatewayServiceState }; + | { outcome: "missing-install"; state: GatewayServiceState } + | { + outcome: "repair-required"; + state: GatewayServiceState; + issues: GatewayServiceStartRepairIssue[]; + }; export type GatewayServiceRenderArgs = { description?: string; diff --git a/src/daemon/service.test.ts b/src/daemon/service.test.ts index 07b3b8a82c7..22e1e549a08 100644 --- a/src/daemon/service.test.ts +++ b/src/daemon/service.test.ts @@ -7,6 +7,7 @@ import { captureEnv } from "../test-utils/env.js"; import type { GatewayService } from "./service.js"; import { describeGatewayServiceRestart, + formatGatewayServiceStartRepairIssues, readGatewayServiceState, resolveGatewayService, startGatewayService, @@ -168,6 +169,55 @@ describe("startGatewayService", () => { expect(result.state.running).toBe(true); }); + it("requests repair before start when the loaded service version is stale", async () => { + const service = createService({ + readCommand: vi.fn(async () => ({ + programArguments: ["openclaw", "gateway", "run"], + environment: { OPENCLAW_SERVICE_VERSION: "2026.4.24" }, + })), + isLoaded: vi.fn(async () => true), + readRuntime: vi.fn(async () => ({ status: "stopped" })), + }); + + const result = await startGatewayService(service, { + env: {}, + stdout: process.stdout, + }); + + expect(result.outcome).toBe("repair-required"); + if (result.outcome === "repair-required") { + expect(formatGatewayServiceStartRepairIssues(result.issues)).toContain( + "service was installed by OpenClaw 2026.4.24", + ); + } + expect(service.restart).not.toHaveBeenCalled(); + }); + + it("requests repair before start when the loaded service points at temporary install paths", async () => { + const service = createService({ + readCommand: vi.fn(async () => ({ + programArguments: [ + "/private/tmp/openclaw-ai-install-cli-pr118/tools/node/bin/node", + "/tmp/openclaw-ai-install-cli-pr118/lib/node_modules/openclaw/dist/index.js", + "gateway", + ], + environment: {}, + })), + isLoaded: vi.fn(async () => true), + }); + + const result = await startGatewayService(service, { + env: {}, + stdout: process.stdout, + }); + + expect(result.outcome).toBe("repair-required"); + if (result.outcome === "repair-required") { + expect(result.issues.map((issue) => issue.code)).toContain("temporary-program"); + } + expect(service.restart).not.toHaveBeenCalled(); + }); + it("falls back to missing-install when restart fails and install artifacts are gone", async () => { const readCommand = vi .fn() diff --git a/src/daemon/service.ts b/src/daemon/service.ts index d89b24343af..6cfb79e5816 100644 --- a/src/daemon/service.ts +++ b/src/daemon/service.ts @@ -1,4 +1,8 @@ +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; import { normalizeLowercaseStringOrEmpty } from "../shared/string-coerce.js"; +import { VERSION } from "../version.js"; import { assertFutureConfigActionAllowed } from "./future-config-guard.js"; import { installLaunchAgent, @@ -29,6 +33,7 @@ import type { GatewayServiceInstallArgs, GatewayServiceManageArgs, GatewayServiceRestartResult, + GatewayServiceStartRepairIssue, GatewayServiceStartResult, GatewayServiceStageArgs, GatewayServiceState, @@ -51,6 +56,7 @@ export type { GatewayServiceInstallArgs, GatewayServiceManageArgs, GatewayServiceRestartResult, + GatewayServiceStartRepairIssue, GatewayServiceStartResult, GatewayServiceStageArgs, GatewayServiceState, @@ -91,6 +97,68 @@ function mergeGatewayServiceEnv( }; } +const TEMP_PROGRAM_ROOTS = [os.tmpdir(), "/tmp", "/private/tmp", "/var/tmp"].map((entry) => + path.resolve(entry), +); + +function pathIsSameOrChild(candidate: string, parent: string): boolean { + return candidate === parent || candidate.startsWith(`${parent}${path.sep}`); +} + +function isTemporaryProgramPath(value: string | undefined): boolean { + if (!value || !path.isAbsolute(value)) { + return false; + } + const resolved = path.resolve(value); + return TEMP_PROGRAM_ROOTS.some((root) => pathIsSameOrChild(resolved, root)); +} + +function isMissingProgramPath(value: string | undefined): boolean { + if (!value || !path.isAbsolute(value)) { + return false; + } + return !fs.existsSync(value); +} + +function collectGatewayServiceStartRepairIssues( + state: GatewayServiceState, +): GatewayServiceStartRepairIssue[] { + const command = state.command; + if (!state.loaded || !command) { + return []; + } + const issues: GatewayServiceStartRepairIssue[] = []; + const serviceVersion = command.environment?.OPENCLAW_SERVICE_VERSION?.trim(); + if (serviceVersion && serviceVersion !== VERSION) { + issues.push({ + code: "version-mismatch", + message: `service was installed by OpenClaw ${serviceVersion}, current CLI is ${VERSION}`, + }); + } + for (const candidate of command.programArguments.slice(0, 2)) { + if (isTemporaryProgramPath(candidate)) { + issues.push({ + code: "temporary-program", + message: `service command points at a temporary path: ${candidate}`, + }); + continue; + } + if (isMissingProgramPath(candidate)) { + issues.push({ + code: "missing-program", + message: `service command points at a missing path: ${candidate}`, + }); + } + } + return issues; +} + +export function formatGatewayServiceStartRepairIssues( + issues: GatewayServiceStartRepairIssue[], +): string { + return issues.map((issue) => issue.message).join("; "); +} + export async function readGatewayServiceState( service: GatewayService, args: GatewayServiceEnvArgs = {}, @@ -124,6 +192,15 @@ export async function startGatewayService( }; } + const repairIssues = collectGatewayServiceStartRepairIssues(state); + if (repairIssues.length > 0) { + return { + outcome: "repair-required", + state, + issues: repairIssues, + }; + } + try { const restartResult = await service.restart({ ...args, env: state.env }); const nextState = await readGatewayServiceState(service, { env: state.env });