fix: guard gateway mutations from older binaries

This commit is contained in:
Peter Steinberger
2026-04-26 06:07:27 +01:00
parent fbd6b3ce3c
commit 8c35e45c00
15 changed files with 457 additions and 15 deletions

View File

@@ -74,6 +74,10 @@ Docs: https://docs.openclaw.ai
- Logging: redact configured secret patterns at console and file-log sink exits
so credentials that reach the logger are masked before terminal display or
JSONL persistence. Fixes #67953. Thanks @Ziy1-Tan.
- Gateway/services: refuse process and service mutations from an older OpenClaw
binary when the config was last written by a newer version, preventing
split-brain installs from stopping or rewriting newer gateway services. Fixes
#57079.
- Agents/groups: treat clean empty assistant stops as silent `NO_REPLY` only for always-on groups where silent replies are allowed, while keeping direct and mention-gated sessions on the incomplete-turn retry path. Thanks @MagnaAI.
- macOS/Node: keep native remote app nodes from advertising `browser.proxy`,
start browser-capable CLI node services through the restored

View File

@@ -584,6 +584,9 @@ Notes:
- If token auth requires a token and the configured token SecretRef is unresolved, doctor blocks the install/repair path with actionable guidance.
- If both `gateway.auth.token` and `gateway.auth.password` are configured and `gateway.auth.mode` is unset, doctor blocks install/repair until mode is set explicitly.
- For Linux user-systemd units, doctor token drift checks now include both `Environment=` and `EnvironmentFile=` sources when comparing service auth metadata.
- Doctor service repairs refuse to rewrite, stop, or restart a gateway service
from an older OpenClaw binary when the config was last written by a newer
version. See [Gateway troubleshooting](/gateway/troubleshooting#split-brain-installs-and-newer-config-guard).
- You can always force a full rewrite via `openclaw gateway install --force`.
### 16) Gateway runtime + port diagnostics

View File

@@ -30,6 +30,42 @@ Expected healthy signals:
- `openclaw channels status --probe` shows live per-account transport status and,
where supported, probe/audit results such as `works` or `audit ok`.
## Split brain installs and newer config guard
Use this when a gateway service unexpectedly stops after an update, or logs show
that one `openclaw` binary is older than the version that last wrote
`openclaw.json`.
OpenClaw stamps config writes with `meta.lastTouchedVersion`. Read-only commands
can still inspect a config written by a newer OpenClaw, but process and service
mutations refuse to continue from an older binary. Blocked actions include
gateway service start, stop, restart, uninstall, forced service reinstall,
service-mode gateway startup, and `gateway --force` port cleanup.
```bash
which openclaw
openclaw --version
openclaw gateway status --deep
openclaw config get meta.lastTouchedVersion
```
Fix options:
1. Fix `PATH` so `openclaw` resolves to the newer install, then rerun the action.
2. Reinstall the intended gateway service from the newer install:
```bash
openclaw gateway install --force
openclaw gateway restart
```
3. Remove stale system package or old wrapper entries that still point at an old
`openclaw` binary.
For intentional downgrade or emergency recovery only, set
`OPENCLAW_ALLOW_OLDER_BINARY_DESTRUCTIVE_ACTIONS=1` for the single command.
Leave it unset for normal operation.
## Anthropic 429 extra usage required for long context
Use this when logs/errors include:

View File

@@ -107,6 +107,32 @@ describe("runDaemonInstall integration", () => {
expect(joined).toContain("MISSING_GATEWAY_TOKEN");
});
it("refuses service install when config was written by a newer OpenClaw", async () => {
await fs.writeFile(
configPath,
JSON.stringify(
{
meta: {
lastTouchedVersion: "9999.1.1",
},
gateway: {
auth: {
mode: "token",
},
},
},
null,
2,
),
);
clearConfigCache();
await expect(runDaemonInstall({ json: true, force: true })).rejects.toThrow("__exit__:1");
expect(serviceMock.install).not.toHaveBeenCalled();
expect(runtimeLogs.join("\n")).toContain("Refusing to install or rewrite the gateway service");
});
it("auto-mints token when no source exists without embedding it into service env", async () => {
await fs.writeFile(
configPath,

View File

@@ -325,6 +325,23 @@ describe("runDaemonInstall", () => {
expect(installDaemonServiceAndEmitMock).not.toHaveBeenCalled();
});
it("blocks install from an older binary when config was written by a newer one", async () => {
readConfigFileSnapshotMock.mockResolvedValue({
exists: true,
valid: true,
config: { meta: { lastTouchedVersion: "9999.1.1" } },
sourceConfig: { meta: { lastTouchedVersion: "9999.1.1" } },
});
await runDaemonInstall({ json: true, force: true });
expect(actionState.failed[0]?.message).toContain(
"Refusing to install or rewrite the gateway service",
);
expect(buildGatewayInstallPlanMock).not.toHaveBeenCalled();
expect(installDaemonServiceAndEmitMock).not.toHaveBeenCalled();
});
it("returns already-installed when the service already has the expected TLS env", async () => {
service.isLoaded.mockResolvedValue(true);
resolveNodeStartupTlsEnvironmentMock.mockReturnValue({

View File

@@ -5,6 +5,7 @@ import {
isGatewayDaemonRuntime,
} from "../../commands/daemon-runtime.js";
import { resolveGatewayInstallToken } from "../../commands/gateway-install-token.js";
import { resolveFutureConfigActionBlock } from "../../config/future-version-guard.js";
import { readConfigFileSnapshotForWrite } from "../../config/io.js";
import { resolveGatewayPort } from "../../config/paths.js";
import { resolveGatewayService } from "../../daemon/service.js";
@@ -69,6 +70,14 @@ export async function runDaemonInstall(opts: DaemonInstallOptions) {
const { snapshot: configSnapshot, writeOptions: configWriteOptions } =
await readConfigFileSnapshotForWrite();
const futureBlock = resolveFutureConfigActionBlock({
action: "install or rewrite the gateway service",
snapshot: configSnapshot,
});
if (futureBlock) {
fail(`Gateway install blocked: ${futureBlock.message}`, futureBlock.hints);
return;
}
const cfg = configSnapshot.valid ? configSnapshot.sourceConfig : configSnapshot.config;
const portOverride = parsePort(opts.port);
if (opts.port !== undefined && portOverride === null) {

View File

@@ -31,11 +31,16 @@ function setConfigSnapshot(params: {
exists: boolean;
valid: boolean;
issues?: Array<{ path: string; message: string }>;
lastTouchedVersion?: string;
}) {
const config = params.lastTouchedVersion
? { meta: { lastTouchedVersion: params.lastTouchedVersion } }
: {};
readConfigFileSnapshotMock.mockResolvedValue({
exists: params.exists,
valid: params.valid,
config: {},
config,
sourceConfig: config,
issues: params.issues ?? [],
});
}
@@ -78,6 +83,19 @@ describe("runServiceRestart config pre-flight (#35862)", () => {
expect(service.restart).not.toHaveBeenCalled();
});
it("blocks restart from an older binary when config was written by a newer one", async () => {
setConfigSnapshot({ exists: true, valid: true, lastTouchedVersion: "9999.1.1" });
await expect(runServiceRestart(createServiceRunArgs())).rejects.toThrow("__exit__:1");
expect(service.restart).not.toHaveBeenCalled();
expect(defaultRuntime.writeJson).toHaveBeenCalledWith(
expect.objectContaining({
error: expect.stringContaining("Refusing to restart the gateway service"),
}),
);
});
it("proceeds with restart when config is valid", async () => {
setConfigSnapshot({ exists: true, valid: true });
@@ -162,3 +180,37 @@ describe("runServiceStart config pre-flight (#35862)", () => {
expect(service.restart).toHaveBeenCalledTimes(1);
});
});
describe("runServiceStop future-config guard", () => {
let runServiceStop: typeof import("./lifecycle-core.js").runServiceStop;
beforeAll(async () => {
({ runServiceStop } = await import("./lifecycle-core.js"));
});
beforeEach(() => {
resetLifecycleRuntimeLogs();
readConfigFileSnapshotMock.mockReset();
setConfigSnapshot({ exists: true, valid: true });
resetLifecycleServiceMocks();
});
it("blocks stop from an older binary when config was written by a newer one", async () => {
setConfigSnapshot({ exists: true, valid: true, lastTouchedVersion: "9999.1.1" });
await expect(
runServiceStop({
serviceNoun: "Gateway",
service,
opts: { json: true },
}),
).rejects.toThrow("__exit__:1");
expect(service.stop).not.toHaveBeenCalled();
expect(defaultRuntime.writeJson).toHaveBeenCalledWith(
expect.objectContaining({
error: expect.stringContaining("Refusing to stop the gateway service"),
}),
);
});
});

View File

@@ -1,5 +1,6 @@
import type { Writable } from "node:stream";
import { readBestEffortConfig, readConfigFileSnapshot } from "../../config/config.js";
import { resolveFutureConfigActionBlock } from "../../config/future-version-guard.js";
import { formatConfigIssueLines } from "../../config/issue-format.js";
import { resolveIsNixMode } from "../../config/paths.js";
import { checkTokenDrift } from "../../daemon/service-audit.js";
@@ -120,18 +121,38 @@ async function resolveServiceLoadedOrFail(params: {
* may produce false positives, but the check is intentionally best-effort —
* a false positive here is safer than a crash on startup. (#35862)
*/
async function getConfigValidationError(): Promise<string | null> {
type ConfigActionPreflightFailure = {
message: string;
hints?: string[];
};
async function getConfigActionPreflightFailure(
action: string,
): Promise<ConfigActionPreflightFailure | null> {
let snapshot: Awaited<ReturnType<typeof readConfigFileSnapshot>>;
try {
const snapshot = await readConfigFileSnapshot();
if (!snapshot.exists || snapshot.valid) {
return null;
snapshot = await readConfigFileSnapshot();
if (snapshot.exists && !snapshot.valid) {
return {
message:
snapshot.issues.length > 0
? formatConfigIssueLines(snapshot.issues, "", { normalizeRoot: true }).join("\n")
: "Unknown validation issue.",
};
}
return snapshot.issues.length > 0
? formatConfigIssueLines(snapshot.issues, "", { normalizeRoot: true }).join("\n")
: "Unknown validation issue.";
} catch {
return null;
}
const futureBlock = resolveFutureConfigActionBlock({ action, snapshot });
if (futureBlock) {
return {
message: futureBlock.message,
hints: futureBlock.hints,
};
}
return null;
}
export async function runServiceUninstall(params: {
@@ -149,6 +170,14 @@ export async function runServiceUninstall(params: {
return;
}
{
const preflight = await getConfigActionPreflightFailure("uninstall the gateway service");
if (preflight) {
fail(`${params.serviceNoun} uninstall blocked: ${preflight.message}`, preflight.hints);
return;
}
}
let loaded = false;
try {
loaded = await params.service.isLoaded({ env: process.env });
@@ -207,10 +236,13 @@ export async function runServiceStart(params: {
// Pre-flight config validation (#35862) — run for both loaded and not-loaded
// to prevent launching from invalid config in any start path.
{
const configError = await getConfigValidationError();
if (configError) {
const preflight = await getConfigActionPreflightFailure("start the gateway service");
if (preflight) {
fail(
`${params.serviceNoun} aborted: config is invalid.\n${configError}\nFix the config and retry, or run "openclaw doctor" to repair.`,
preflight.hints
? `${params.serviceNoun} start blocked: ${preflight.message}`
: `${params.serviceNoun} aborted: config is invalid.\n${preflight.message}\nFix the config and retry, or run "openclaw doctor" to repair.`,
preflight.hints,
);
return;
}
@@ -295,6 +327,13 @@ export async function runServiceStop(params: {
if (loaded === null) {
return;
}
{
const preflight = await getConfigActionPreflightFailure("stop the gateway service");
if (preflight) {
fail(`${params.serviceNoun} stop blocked: ${preflight.message}`, preflight.hints);
return;
}
}
if (!loaded) {
try {
const handled = await params.onNotLoaded?.({ json, stdout, fail });
@@ -390,10 +429,13 @@ export async function runServiceRestart(params: {
// Pre-flight config validation: check before any restart action (including
// onNotLoaded which may send SIGUSR1 to an unmanaged process). (#35862)
{
const configError = await getConfigValidationError();
if (configError) {
const preflight = await getConfigActionPreflightFailure("restart the gateway service");
if (preflight) {
fail(
`${params.serviceNoun} aborted: config is invalid.\n${configError}\nFix the config and retry, or run "openclaw doctor" to repair.`,
preflight.hints
? `${params.serviceNoun} restart blocked: ${preflight.message}`
: `${params.serviceNoun} aborted: config is invalid.\n${preflight.message}\nFix the config and retry, or run "openclaw doctor" to repair.`,
preflight.hints,
);
return false;
}

View File

@@ -40,6 +40,9 @@ export function resetLifecycleRuntimeLogs() {
export function resetLifecycleServiceMocks() {
service.stage.mockClear();
service.install.mockClear();
service.uninstall.mockClear();
service.stop.mockClear();
service.isLoaded.mockClear();
service.readCommand.mockClear();
service.readRuntime.mockClear();
@@ -47,6 +50,8 @@ export function resetLifecycleServiceMocks() {
service.isLoaded.mockResolvedValue(true);
service.readCommand.mockResolvedValue({ programArguments: [], environment: {} });
service.readRuntime.mockResolvedValue({ status: "running" });
service.stop.mockResolvedValue(undefined);
service.uninstall.mockResolvedValue(undefined);
service.restart.mockResolvedValue({ outcome: "completed" });
}

View File

@@ -231,6 +231,49 @@ describe("gateway run option collisions", () => {
);
});
it("blocks --force port cleanup from an older binary with newer config", async () => {
configState.snapshot = {
exists: true,
valid: true,
config: { meta: { lastTouchedVersion: "9999.1.1" } },
sourceConfig: { meta: { lastTouchedVersion: "9999.1.1" } },
};
await expect(
runGatewayCli(["gateway", "run", "--allow-unconfigured", "--force"]),
).rejects.toThrow("__exit__:1");
expect(forceFreePortAndWait).not.toHaveBeenCalled();
expect(startGatewayServer).not.toHaveBeenCalled();
expect(runtimeErrors.join("\n")).toContain("Refusing to force-kill gateway port listeners");
});
it("blocks service-mode startup from an older binary with newer config", async () => {
configState.snapshot = {
exists: true,
valid: true,
config: { meta: { lastTouchedVersion: "9999.1.1" } },
sourceConfig: { meta: { lastTouchedVersion: "9999.1.1" } },
};
const previousMarker = process.env.OPENCLAW_SERVICE_MARKER;
process.env.OPENCLAW_SERVICE_MARKER = "gateway";
try {
await expect(runGatewayCli(["gateway", "run", "--allow-unconfigured"])).rejects.toThrow(
"__exit__:78",
);
} finally {
if (previousMarker === undefined) {
delete process.env.OPENCLAW_SERVICE_MARKER;
} else {
process.env.OPENCLAW_SERVICE_MARKER = previousMarker;
}
}
expect(forceFreePortAndWait).not.toHaveBeenCalled();
expect(startGatewayServer).not.toHaveBeenCalled();
expect(runtimeErrors.join("\n")).toContain("Refusing to start the gateway service");
});
it.each([
["--cli-backend-logs", "generic flag"],
["--claude-cli-logs", "deprecated alias"],

View File

@@ -17,6 +17,10 @@ import {
resolveStateDir,
resolveGatewayPort,
} from "../../config/config.js";
import {
formatFutureConfigActionBlock,
resolveFutureConfigActionBlock,
} from "../../config/future-version-guard.js";
import type { OpenClawConfig } from "../../config/types.openclaw.js";
import { hasConfiguredSecretInput } from "../../config/types.secrets.js";
import { resolveGatewayAuth } from "../../gateway/auth.js";
@@ -424,6 +428,26 @@ async function runGatewayCommand(opts: GatewayRunOpts) {
defaultRuntime.error("Invalid port");
defaultRuntime.exit(1);
}
const futureStartupBlock = resolveFutureConfigActionBlock({
action: "start the gateway service",
snapshot,
});
if (futureStartupBlock && process.env.OPENCLAW_SERVICE_MARKER?.trim()) {
defaultRuntime.error(formatFutureConfigActionBlock(futureStartupBlock));
defaultRuntime.exit(78);
return;
}
const futureForceBlock = opts.force
? resolveFutureConfigActionBlock({
action: "force-kill gateway port listeners",
snapshot,
})
: null;
if (futureForceBlock) {
defaultRuntime.error(formatFutureConfigActionBlock(futureForceBlock));
defaultRuntime.exit(1);
return;
}
// Only capture the *explicit* bind value here. The container-aware
// default is deferred until after Tailscale mode is known (see below)
// so that Tailscale's loopback constraint is respected.

View File

@@ -0,0 +1,64 @@
import { describe, expect, it } from "vitest";
import {
ALLOW_OLDER_BINARY_DESTRUCTIVE_ACTIONS_ENV,
formatFutureConfigActionBlock,
resolveFutureConfigActionBlock,
} from "./future-version-guard.js";
import type { ConfigFileSnapshot } from "./types.js";
function snapshotWithTouchedVersion(
version: string,
): Pick<ConfigFileSnapshot, "config" | "sourceConfig"> {
return {
sourceConfig: { meta: { lastTouchedVersion: version } } as ConfigFileSnapshot["sourceConfig"],
config: {} as ConfigFileSnapshot["config"],
};
}
describe("resolveFutureConfigActionBlock", () => {
it("blocks destructive actions from older binaries", () => {
const block = resolveFutureConfigActionBlock({
action: "restart the gateway service",
currentVersion: "2026.4.5",
snapshot: snapshotWithTouchedVersion("2026.4.23"),
env: {},
});
expect(block?.message).toContain("Refusing to restart the gateway service");
expect(block?.message).toContain("2026.4.5");
expect(block?.message).toContain("2026.4.23");
expect(formatFutureConfigActionBlock(block!)).toContain(
ALLOW_OLDER_BINARY_DESTRUCTIVE_ACTIONS_ENV,
);
});
it("allows same stable family and older configs", () => {
expect(
resolveFutureConfigActionBlock({
action: "restart the gateway service",
currentVersion: "2026.4.23",
snapshot: snapshotWithTouchedVersion("2026.4.23"),
env: {},
}),
).toBeNull();
expect(
resolveFutureConfigActionBlock({
action: "restart the gateway service",
currentVersion: "2026.4.23",
snapshot: snapshotWithTouchedVersion("2026.4.5"),
env: {},
}),
).toBeNull();
});
it("allows intentional downgrade override through env", () => {
expect(
resolveFutureConfigActionBlock({
action: "restart the gateway service",
currentVersion: "2026.4.5",
snapshot: snapshotWithTouchedVersion("2026.4.23"),
env: { [ALLOW_OLDER_BINARY_DESTRUCTIVE_ACTIONS_ENV]: "1" },
}),
).toBeNull();
});
});

View File

@@ -0,0 +1,66 @@
import { VERSION } from "../version.js";
import type { ConfigFileSnapshot, OpenClawConfig } from "./types.js";
import { shouldWarnOnTouchedVersion } from "./version.js";
export const ALLOW_OLDER_BINARY_DESTRUCTIVE_ACTIONS_ENV =
"OPENCLAW_ALLOW_OLDER_BINARY_DESTRUCTIVE_ACTIONS";
export type FutureConfigActionBlock = {
action: string;
currentVersion: string;
touchedVersion: string;
message: string;
hints: string[];
};
type FutureConfigGuardParams = {
action: string;
snapshot?: Pick<ConfigFileSnapshot, "config" | "sourceConfig"> | null;
config?: Pick<OpenClawConfig, "meta"> | null;
currentVersion?: string;
env?: Record<string, string | undefined>;
};
function allowOlderBinaryDestructiveActions(env: Record<string, string | undefined>): boolean {
const raw = env[ALLOW_OLDER_BINARY_DESTRUCTIVE_ACTIONS_ENV]?.trim().toLowerCase();
return raw === "1" || raw === "true" || raw === "yes";
}
function resolveTouchedVersion(params: FutureConfigGuardParams): string | null {
return (
params.snapshot?.sourceConfig?.meta?.lastTouchedVersion?.trim() ||
params.snapshot?.config?.meta?.lastTouchedVersion?.trim() ||
params.config?.meta?.lastTouchedVersion?.trim() ||
null
);
}
export function resolveFutureConfigActionBlock(
params: FutureConfigGuardParams,
): FutureConfigActionBlock | null {
const env = params.env ?? process.env;
if (allowOlderBinaryDestructiveActions(env)) {
return null;
}
const currentVersion = params.currentVersion ?? VERSION;
const touchedVersion = resolveTouchedVersion(params);
if (!touchedVersion || !shouldWarnOnTouchedVersion(currentVersion, touchedVersion)) {
return null;
}
return {
action: params.action,
currentVersion,
touchedVersion,
message: `Refusing to ${params.action} because this OpenClaw binary (${currentVersion}) is older than the config last written by OpenClaw ${touchedVersion}.`,
hints: [
"Run the newer openclaw binary on PATH, or reinstall the intended gateway service from the newer install.",
`Set ${ALLOW_OLDER_BINARY_DESTRUCTIVE_ACTIONS_ENV}=1 only for an intentional downgrade or recovery action.`,
],
};
}
export function formatFutureConfigActionBlock(block: FutureConfigActionBlock): string {
return [block.message, ...block.hints].join("\n");
}

View File

@@ -0,0 +1,24 @@
import { readConfigFileSnapshot } from "../config/config.js";
import {
formatFutureConfigActionBlock,
resolveFutureConfigActionBlock,
type FutureConfigActionBlock,
} from "../config/future-version-guard.js";
export async function readFutureConfigActionBlock(
action: string,
): Promise<FutureConfigActionBlock | null> {
try {
const snapshot = await readConfigFileSnapshot();
return resolveFutureConfigActionBlock({ action, snapshot });
} catch {
return null;
}
}
export async function assertFutureConfigActionAllowed(action: string): Promise<void> {
const block = await readFutureConfigActionBlock(action);
if (block) {
throw new Error(formatFutureConfigActionBlock(block));
}
}

View File

@@ -1,4 +1,5 @@
import { normalizeLowercaseStringOrEmpty } from "../shared/string-coerce.js";
import { assertFutureConfigActionAllowed } from "./future-config-guard.js";
import {
installLaunchAgent,
isLaunchAgentLoaded,
@@ -211,6 +212,32 @@ const GATEWAY_SERVICE_REGISTRY: Record<SupportedGatewayServicePlatform, GatewayS
},
};
function withFutureConfigGuard(service: GatewayService): GatewayService {
return {
...service,
stage: async (args) => {
await assertFutureConfigActionAllowed("rewrite the gateway service");
return await service.stage(args);
},
install: async (args) => {
await assertFutureConfigActionAllowed("install or rewrite the gateway service");
return await service.install(args);
},
uninstall: async (args) => {
await assertFutureConfigActionAllowed("uninstall the gateway service");
return await service.uninstall(args);
},
stop: async (args) => {
await assertFutureConfigActionAllowed("stop the gateway service");
return await service.stop(args);
},
restart: async (args) => {
await assertFutureConfigActionAllowed("restart the gateway service");
return await service.restart(args);
},
};
}
function isSupportedGatewayServicePlatform(
platform: NodeJS.Platform,
): platform is SupportedGatewayServicePlatform {
@@ -219,7 +246,7 @@ function isSupportedGatewayServicePlatform(
export function resolveGatewayService(): GatewayService {
if (isSupportedGatewayServicePlatform(process.platform)) {
return GATEWAY_SERVICE_REGISTRY[process.platform];
return withFutureConfigGuard(GATEWAY_SERVICE_REGISTRY[process.platform]);
}
throw new Error(`Gateway service install not supported on ${process.platform}`);
}