fix(gateway): keep container restarts in-process

This commit is contained in:
Peter Steinberger
2026-04-28 06:30:04 +01:00
parent a3fd97570f
commit 833654586e
5 changed files with 90 additions and 55 deletions

View File

@@ -60,6 +60,7 @@ Docs: https://docs.openclaw.ai
- Channels/Telegram: keep Bot API network fallbacks sticky after failed attempts and retry timed-out startup control calls once on the fallback route, so `deleteWebhook` IPv6 stalls no longer trigger slow multi-account retry storms. Fixes #73255. Thanks @ttomiczek and @sktbrd.
- Gateway/models: merge explicit `models.providers.*.models` rows into the Gateway model catalog with normalized provider/model dedupe, and use normalized image-capability lookup so custom vision models keep native image attachments even when Pi discovery omits them or model ID casing differs. Fixes #64213 and #65165. Thanks @billonese and @202233a.
- Gateway/reload: publish canonical post-write source config to in-process reloaders so simple config saves no longer create phantom plugin diffs or trigger unnecessary Gateway restarts. (#73267) Thanks @szsip239.
- Gateway/Docker: keep config-triggered restarts in-process inside containers instead of spawning a detached child and exiting PID 1 cleanly, so Docker Swarm and other on-failure supervisors do not leave the service stuck at 0/1 replicas. Fixes #73178. Thanks @du-nguyen-IT007.
- CLI/tasks: ship the task-registry control runtime in npm packages so `openclaw tasks cancel` can load ACP/subagent cancellation helpers from published builds. Fixes #68997. Thanks @1OAKDesign.
- Channels/Telegram: preserve unsent generated media after partial reply streaming has already delivered the text, so `image_generate` outputs still reach Telegram as photos instead of being dropped from the final payload. Fixes #73253. Thanks @mlaihk.
- Export/session: keep inline export HTML scripts and vendor libraries injected after template formatting so generated session exports open with the app code, markdown renderer, and syntax highlighter present. Fixes #41862 and #49957; carries forward #41861 and #68947. Thanks @briannewman, @martenzi, and @armanddp.

View File

@@ -1,7 +1,10 @@
import fs from "node:fs";
import type { IncomingMessage } from "node:http";
import net from "node:net";
import type { GatewayBindMode } from "../config/types.gateway.js";
import {
__resetContainerEnvironmentCacheForTest,
isContainerEnvironment,
} from "../infra/container-environment.js";
import {
pickMatchingExternalInterfaceAddress,
readNetworkInterfaces,
@@ -228,60 +231,10 @@ export function isLocalGatewayAddress(ip: string | undefined): boolean {
return false;
}
/**
* Detect whether the current process is running inside a container
* (Docker, Podman, or Kubernetes).
*
* Uses two reliable heuristics:
* 1. Presence of well-known container sentinel files such as `/.dockerenv`
* (Docker) or `/run/.containerenv` (Podman).
* 2. Presence of container-related cgroup entries in `/proc/1/cgroup`
* (covers Docker, containerd, and Kubernetes pods).
*
* The result is cached after the first call so filesystem access
* happens at most once per process lifetime.
*/
let _containerCacheResult: boolean | undefined;
export function isContainerEnvironment(): boolean {
if (_containerCacheResult !== undefined) {
return _containerCacheResult;
}
_containerCacheResult = detectContainerEnvironment();
return _containerCacheResult;
}
function detectContainerEnvironment(): boolean {
// 1. Check common Docker/Podman container sentinel files.
for (const sentinelPath of ["/.dockerenv", "/run/.containerenv", "/var/run/.containerenv"]) {
try {
fs.accessSync(sentinelPath, fs.constants.F_OK);
return true;
} catch {
// not present — continue
}
}
// 2. /proc/1/cgroup contains docker, containerd, kubepods, or lxc markers.
// Covers both cgroup v1 (/docker/<id>, /kubepods/...) and cgroup v2
// (kubepods.slice, cri-containerd-<id>.scope) path formats.
try {
const cgroup = fs.readFileSync("/proc/1/cgroup", "utf8");
if (
/\/docker\/|cri-containerd-[0-9a-f]|containerd\/[0-9a-f]{64}|\/kubepods[/.]|\blxc\b/.test(
cgroup,
)
) {
return true;
}
} catch {
// /proc may not exist (macOS, Windows) — not a container
}
return false;
}
/** @internal — test-only helper to reset the cached container detection result. */
export function __resetContainerCacheForTest(): void {
_containerCacheResult = undefined;
}
export {
isContainerEnvironment,
__resetContainerEnvironmentCacheForTest as __resetContainerCacheForTest,
};
/**
* Resolves gateway bind host with fallback strategy.

View File

@@ -0,0 +1,53 @@
import fs from "node:fs";
/**
* Detect whether the current process is running inside a container
* (Docker, Podman, or Kubernetes).
*
* Uses two reliable heuristics:
* - Presence of common container sentinel files.
* - Container-related entries in /proc/1/cgroup.
*
* The result is cached after the first call so filesystem access happens at
* most once per process lifetime.
*/
let containerEnvironmentCache: boolean | undefined;
export function isContainerEnvironment(): boolean {
if (containerEnvironmentCache !== undefined) {
return containerEnvironmentCache;
}
containerEnvironmentCache = detectContainerEnvironment();
return containerEnvironmentCache;
}
function detectContainerEnvironment(): boolean {
for (const sentinelPath of ["/.dockerenv", "/run/.containerenv", "/var/run/.containerenv"]) {
try {
fs.accessSync(sentinelPath, fs.constants.F_OK);
return true;
} catch {
// Not present; try the next signal.
}
}
try {
const cgroup = fs.readFileSync("/proc/1/cgroup", "utf8");
if (
/\/docker\/|cri-containerd-[0-9a-f]|containerd\/[0-9a-f]{64}|\/kubepods[/.]|\blxc\b/.test(
cgroup,
)
) {
return true;
}
} catch {
// /proc may not exist on non-Linux platforms.
}
return false;
}
/** @internal test helper */
export function __resetContainerEnvironmentCacheForTest(): void {
containerEnvironmentCache = undefined;
}

View File

@@ -4,6 +4,7 @@ import { SUPERVISOR_HINT_ENV_VARS } from "./supervisor-markers.js";
const spawnMock = vi.hoisted(() => vi.fn());
const triggerOpenClawRestartMock = vi.hoisted(() => vi.fn());
const isContainerEnvironmentMock = vi.hoisted(() => vi.fn(() => false));
vi.mock("node:child_process", async () => {
const { mockNodeBuiltinModule } = await import("openclaw/plugin-sdk/test-node-mocks");
@@ -17,6 +18,9 @@ vi.mock("node:child_process", async () => {
vi.mock("./restart.js", () => ({
triggerOpenClawRestart: (...args: unknown[]) => triggerOpenClawRestartMock(...args),
}));
vi.mock("./container-environment.js", () => ({
isContainerEnvironment: () => isContainerEnvironmentMock(),
}));
import {
respawnGatewayProcessForUpdate,
@@ -44,6 +48,8 @@ afterEach(() => {
process.execArgv = [...originalExecArgv];
spawnMock.mockClear();
triggerOpenClawRestartMock.mockClear();
isContainerEnvironmentMock.mockReset();
isContainerEnvironmentMock.mockReturnValue(false);
if (originalPlatformDescriptor) {
Object.defineProperty(process, "platform", originalPlatformDescriptor);
}
@@ -206,6 +212,21 @@ describe("restartGatewayProcessWithFreshPid", () => {
expect(spawnMock).not.toHaveBeenCalled();
});
it("returns disabled in containers so PID 1 stays alive for in-process restart", () => {
delete process.env.OPENCLAW_NO_RESPAWN;
clearSupervisorHints();
setPlatform("linux");
isContainerEnvironmentMock.mockReturnValue(true);
const result = restartGatewayProcessWithFreshPid();
expect(result).toEqual({
mode: "disabled",
detail: "container: use in-process restart to keep PID 1 alive",
});
expect(spawnMock).not.toHaveBeenCalled();
});
it("ignores node task script hints for gateway restart detection on Windows", () => {
clearSupervisorHints();
setPlatform("win32");

View File

@@ -1,5 +1,6 @@
import { spawn, type ChildProcess } from "node:child_process";
import { normalizeOptionalLowercaseString } from "../shared/string-coerce.js";
import { isContainerEnvironment } from "./container-environment.js";
import { formatErrorMessage } from "./errors.js";
import { triggerOpenClawRestart } from "./restart.js";
import { detectRespawnSupervisor } from "./supervisor-markers.js";
@@ -66,6 +67,12 @@ export function restartGatewayProcessWithFreshPid(): GatewayRespawnResult {
detail: "win32: detached respawn unsupported without Scheduled Task markers",
};
}
if (isContainerEnvironment()) {
return {
mode: "disabled",
detail: "container: use in-process restart to keep PID 1 alive",
};
}
try {
const { pid } = spawnDetachedGatewayProcess();