mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 05:50:43 +00:00
fix(gateway): keep container restarts in-process
This commit is contained in:
@@ -60,6 +60,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Channels/Telegram: keep Bot API network fallbacks sticky after failed attempts and retry timed-out startup control calls once on the fallback route, so `deleteWebhook` IPv6 stalls no longer trigger slow multi-account retry storms. Fixes #73255. Thanks @ttomiczek and @sktbrd.
|
||||
- Gateway/models: merge explicit `models.providers.*.models` rows into the Gateway model catalog with normalized provider/model dedupe, and use normalized image-capability lookup so custom vision models keep native image attachments even when Pi discovery omits them or model ID casing differs. Fixes #64213 and #65165. Thanks @billonese and @202233a.
|
||||
- Gateway/reload: publish canonical post-write source config to in-process reloaders so simple config saves no longer create phantom plugin diffs or trigger unnecessary Gateway restarts. (#73267) Thanks @szsip239.
|
||||
- Gateway/Docker: keep config-triggered restarts in-process inside containers instead of spawning a detached child and exiting PID 1 cleanly, so Docker Swarm and other on-failure supervisors do not leave the service stuck at 0/1 replicas. Fixes #73178. Thanks @du-nguyen-IT007.
|
||||
- CLI/tasks: ship the task-registry control runtime in npm packages so `openclaw tasks cancel` can load ACP/subagent cancellation helpers from published builds. Fixes #68997. Thanks @1OAKDesign.
|
||||
- Channels/Telegram: preserve unsent generated media after partial reply streaming has already delivered the text, so `image_generate` outputs still reach Telegram as photos instead of being dropped from the final payload. Fixes #73253. Thanks @mlaihk.
|
||||
- Export/session: keep inline export HTML scripts and vendor libraries injected after template formatting so generated session exports open with the app code, markdown renderer, and syntax highlighter present. Fixes #41862 and #49957; carries forward #41861 and #68947. Thanks @briannewman, @martenzi, and @armanddp.
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
import fs from "node:fs";
|
||||
import type { IncomingMessage } from "node:http";
|
||||
import net from "node:net";
|
||||
import type { GatewayBindMode } from "../config/types.gateway.js";
|
||||
import {
|
||||
__resetContainerEnvironmentCacheForTest,
|
||||
isContainerEnvironment,
|
||||
} from "../infra/container-environment.js";
|
||||
import {
|
||||
pickMatchingExternalInterfaceAddress,
|
||||
readNetworkInterfaces,
|
||||
@@ -228,60 +231,10 @@ export function isLocalGatewayAddress(ip: string | undefined): boolean {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect whether the current process is running inside a container
|
||||
* (Docker, Podman, or Kubernetes).
|
||||
*
|
||||
* Uses two reliable heuristics:
|
||||
* 1. Presence of well-known container sentinel files such as `/.dockerenv`
|
||||
* (Docker) or `/run/.containerenv` (Podman).
|
||||
* 2. Presence of container-related cgroup entries in `/proc/1/cgroup`
|
||||
* (covers Docker, containerd, and Kubernetes pods).
|
||||
*
|
||||
* The result is cached after the first call so filesystem access
|
||||
* happens at most once per process lifetime.
|
||||
*/
|
||||
let _containerCacheResult: boolean | undefined;
|
||||
export function isContainerEnvironment(): boolean {
|
||||
if (_containerCacheResult !== undefined) {
|
||||
return _containerCacheResult;
|
||||
}
|
||||
_containerCacheResult = detectContainerEnvironment();
|
||||
return _containerCacheResult;
|
||||
}
|
||||
|
||||
function detectContainerEnvironment(): boolean {
|
||||
// 1. Check common Docker/Podman container sentinel files.
|
||||
for (const sentinelPath of ["/.dockerenv", "/run/.containerenv", "/var/run/.containerenv"]) {
|
||||
try {
|
||||
fs.accessSync(sentinelPath, fs.constants.F_OK);
|
||||
return true;
|
||||
} catch {
|
||||
// not present — continue
|
||||
}
|
||||
}
|
||||
// 2. /proc/1/cgroup contains docker, containerd, kubepods, or lxc markers.
|
||||
// Covers both cgroup v1 (/docker/<id>, /kubepods/...) and cgroup v2
|
||||
// (kubepods.slice, cri-containerd-<id>.scope) path formats.
|
||||
try {
|
||||
const cgroup = fs.readFileSync("/proc/1/cgroup", "utf8");
|
||||
if (
|
||||
/\/docker\/|cri-containerd-[0-9a-f]|containerd\/[0-9a-f]{64}|\/kubepods[/.]|\blxc\b/.test(
|
||||
cgroup,
|
||||
)
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
} catch {
|
||||
// /proc may not exist (macOS, Windows) — not a container
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/** @internal — test-only helper to reset the cached container detection result. */
|
||||
export function __resetContainerCacheForTest(): void {
|
||||
_containerCacheResult = undefined;
|
||||
}
|
||||
export {
|
||||
isContainerEnvironment,
|
||||
__resetContainerEnvironmentCacheForTest as __resetContainerCacheForTest,
|
||||
};
|
||||
|
||||
/**
|
||||
* Resolves gateway bind host with fallback strategy.
|
||||
|
||||
53
src/infra/container-environment.ts
Normal file
53
src/infra/container-environment.ts
Normal file
@@ -0,0 +1,53 @@
|
||||
import fs from "node:fs";
|
||||
|
||||
/**
|
||||
* Detect whether the current process is running inside a container
|
||||
* (Docker, Podman, or Kubernetes).
|
||||
*
|
||||
* Uses two reliable heuristics:
|
||||
* - Presence of common container sentinel files.
|
||||
* - Container-related entries in /proc/1/cgroup.
|
||||
*
|
||||
* The result is cached after the first call so filesystem access happens at
|
||||
* most once per process lifetime.
|
||||
*/
|
||||
let containerEnvironmentCache: boolean | undefined;
|
||||
|
||||
export function isContainerEnvironment(): boolean {
|
||||
if (containerEnvironmentCache !== undefined) {
|
||||
return containerEnvironmentCache;
|
||||
}
|
||||
containerEnvironmentCache = detectContainerEnvironment();
|
||||
return containerEnvironmentCache;
|
||||
}
|
||||
|
||||
function detectContainerEnvironment(): boolean {
|
||||
for (const sentinelPath of ["/.dockerenv", "/run/.containerenv", "/var/run/.containerenv"]) {
|
||||
try {
|
||||
fs.accessSync(sentinelPath, fs.constants.F_OK);
|
||||
return true;
|
||||
} catch {
|
||||
// Not present; try the next signal.
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
const cgroup = fs.readFileSync("/proc/1/cgroup", "utf8");
|
||||
if (
|
||||
/\/docker\/|cri-containerd-[0-9a-f]|containerd\/[0-9a-f]{64}|\/kubepods[/.]|\blxc\b/.test(
|
||||
cgroup,
|
||||
)
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
} catch {
|
||||
// /proc may not exist on non-Linux platforms.
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/** @internal test helper */
|
||||
export function __resetContainerEnvironmentCacheForTest(): void {
|
||||
containerEnvironmentCache = undefined;
|
||||
}
|
||||
@@ -4,6 +4,7 @@ import { SUPERVISOR_HINT_ENV_VARS } from "./supervisor-markers.js";
|
||||
|
||||
const spawnMock = vi.hoisted(() => vi.fn());
|
||||
const triggerOpenClawRestartMock = vi.hoisted(() => vi.fn());
|
||||
const isContainerEnvironmentMock = vi.hoisted(() => vi.fn(() => false));
|
||||
|
||||
vi.mock("node:child_process", async () => {
|
||||
const { mockNodeBuiltinModule } = await import("openclaw/plugin-sdk/test-node-mocks");
|
||||
@@ -17,6 +18,9 @@ vi.mock("node:child_process", async () => {
|
||||
vi.mock("./restart.js", () => ({
|
||||
triggerOpenClawRestart: (...args: unknown[]) => triggerOpenClawRestartMock(...args),
|
||||
}));
|
||||
vi.mock("./container-environment.js", () => ({
|
||||
isContainerEnvironment: () => isContainerEnvironmentMock(),
|
||||
}));
|
||||
|
||||
import {
|
||||
respawnGatewayProcessForUpdate,
|
||||
@@ -44,6 +48,8 @@ afterEach(() => {
|
||||
process.execArgv = [...originalExecArgv];
|
||||
spawnMock.mockClear();
|
||||
triggerOpenClawRestartMock.mockClear();
|
||||
isContainerEnvironmentMock.mockReset();
|
||||
isContainerEnvironmentMock.mockReturnValue(false);
|
||||
if (originalPlatformDescriptor) {
|
||||
Object.defineProperty(process, "platform", originalPlatformDescriptor);
|
||||
}
|
||||
@@ -206,6 +212,21 @@ describe("restartGatewayProcessWithFreshPid", () => {
|
||||
expect(spawnMock).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("returns disabled in containers so PID 1 stays alive for in-process restart", () => {
|
||||
delete process.env.OPENCLAW_NO_RESPAWN;
|
||||
clearSupervisorHints();
|
||||
setPlatform("linux");
|
||||
isContainerEnvironmentMock.mockReturnValue(true);
|
||||
|
||||
const result = restartGatewayProcessWithFreshPid();
|
||||
|
||||
expect(result).toEqual({
|
||||
mode: "disabled",
|
||||
detail: "container: use in-process restart to keep PID 1 alive",
|
||||
});
|
||||
expect(spawnMock).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("ignores node task script hints for gateway restart detection on Windows", () => {
|
||||
clearSupervisorHints();
|
||||
setPlatform("win32");
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import { spawn, type ChildProcess } from "node:child_process";
|
||||
import { normalizeOptionalLowercaseString } from "../shared/string-coerce.js";
|
||||
import { isContainerEnvironment } from "./container-environment.js";
|
||||
import { formatErrorMessage } from "./errors.js";
|
||||
import { triggerOpenClawRestart } from "./restart.js";
|
||||
import { detectRespawnSupervisor } from "./supervisor-markers.js";
|
||||
@@ -66,6 +67,12 @@ export function restartGatewayProcessWithFreshPid(): GatewayRespawnResult {
|
||||
detail: "win32: detached respawn unsupported without Scheduled Task markers",
|
||||
};
|
||||
}
|
||||
if (isContainerEnvironment()) {
|
||||
return {
|
||||
mode: "disabled",
|
||||
detail: "container: use in-process restart to keep PID 1 alive",
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
const { pid } = spawnDetachedGatewayProcess();
|
||||
|
||||
Reference in New Issue
Block a user