fix(update): authenticate restart health probes

This commit is contained in:
Vincent Koc
2026-05-04 20:14:02 -07:00
parent a373468d82
commit b546aa91e1
3 changed files with 128 additions and 8 deletions

View File

@@ -72,6 +72,7 @@ Docs: https://docs.openclaw.ai
- Telegram/media: derive no-caption inbound media placeholders from saved MIME metadata instead of the Telegram `photo` shape, so non-image and mixed attachments no longer reach the model as `<media:image>`. Fixes #69793. Thanks @aspalagin.
- Agents/cache: keep per-turn runtime context out of ordinary chat system prompts while still delivering hidden current-turn context, restoring prompt-cache reuse on chat continuations. Fixes #77431. Thanks @Udjin79.
- Gateway/startup: include resolved thinking and fast-mode defaults in the `agent model` startup log line, defaulting unset startup thinking to `medium` without mixing in reasoning visibility.
- Gateway/update: resolve local gateway probe auth from the installed config during post-update restart verification, so token/device-authenticated VPS gateways are not misreported as unhealthy port conflicts after a package swap. Thanks @vincentkoc.
- Agents/Tools: add post-compaction loop guard in `pi-embedded-runner` that arms after auto-compaction-retry and aborts the run with `compaction_loop_persisted` when the agent emits the same `(tool, args, result)` triple `windowSize` times (default 3) within that window. Disable via existing `tools.loopDetection.enabled`; tune via `tools.loopDetection.postCompactionGuard.windowSize`. Targets the failure mode where context-overflow + compaction does not break a tool-call loop. Refs #77474; carries forward #21597. Thanks @efpiva.
- Gateway/watch: suppress sync-I/O trace output during `pnpm gateway:watch --benchmark` unless explicitly requested, so CPU profiling no longer floods the terminal with stack traces.
- Gateway/watch: when benchmark sync-I/O tracing is explicitly enabled, tee trace blocks to the benchmark output log and filter them from the terminal pane while keeping normal Gateway logs visible.

View File

@@ -8,6 +8,12 @@ const classifyPortListener = vi.hoisted(() =>
vi.fn<(_listener: unknown, _port: number) => PortListenerKind>(() => "gateway"),
);
const probeGateway = vi.hoisted(() => vi.fn());
const readBestEffortConfig = vi.hoisted(() => vi.fn(async () => ({})));
const resolveGatewayProbeAuthSafeWithSecretInputs = vi.hoisted(() =>
vi.fn<(_opts: unknown) => Promise<{ auth: { token?: string; password?: string } }>>(async () => ({
auth: {},
})),
);
vi.mock("../../infra/ports.js", () => ({
classifyPortListener: (listener: unknown, port: number) => classifyPortListener(listener, port),
@@ -19,6 +25,17 @@ vi.mock("../../gateway/probe.js", () => ({
probeGateway: (opts: unknown) => probeGateway(opts),
}));
vi.mock("../../config/io.js", () => ({
createConfigIO: () => ({
readBestEffortConfig: () => readBestEffortConfig(),
}),
}));
vi.mock("../../gateway/probe-auth.js", () => ({
resolveGatewayProbeAuthSafeWithSecretInputs: (opts: unknown) =>
resolveGatewayProbeAuthSafeWithSecretInputs(opts),
}));
vi.mock("../../utils.js", async () => {
const actual = await vi.importActual<typeof import("../../utils.js")>("../../utils.js");
return {
@@ -112,6 +129,10 @@ async function waitForStoppedFreeGatewayRestart() {
describe("inspectGatewayRestart", () => {
beforeEach(() => {
inspectPortUsage.mockReset();
readBestEffortConfig.mockReset();
readBestEffortConfig.mockResolvedValue({});
resolveGatewayProbeAuthSafeWithSecretInputs.mockReset();
resolveGatewayProbeAuthSafeWithSecretInputs.mockResolvedValue({ auth: {} });
inspectPortUsage.mockResolvedValue({
port: 0,
status: "free",
@@ -380,6 +401,52 @@ describe("inspectGatewayRestart", () => {
expect(snapshot.versionMismatch).toBeUndefined();
});
it("uses configured local probe auth while waiting for a matching-version restart", async () => {
readBestEffortConfig.mockResolvedValue({
gateway: { auth: { mode: "token", token: "probe-token" } },
});
resolveGatewayProbeAuthSafeWithSecretInputs.mockResolvedValue({
auth: { token: "probe-token" },
});
probeGateway.mockResolvedValue({
ok: true,
close: null,
server: { version: "2026.4.24", connId: "new" },
});
const service = makeGatewayService({ status: "running", pid: 8000 });
inspectPortUsage.mockResolvedValue({
port: 18789,
status: "busy",
listeners: [{ pid: 8000, commandLine: "openclaw-gateway" }],
hints: [],
});
const { waitForGatewayHealthyRestart } = await import("./restart-health.js");
const snapshot = await waitForGatewayHealthyRestart({
service,
port: 18789,
expectedVersion: "2026.4.24",
attempts: 1,
});
expect(snapshot).toMatchObject({
healthy: true,
gatewayVersion: "2026.4.24",
expectedVersion: "2026.4.24",
});
expect(resolveGatewayProbeAuthSafeWithSecretInputs).toHaveBeenCalledWith(
expect.objectContaining({
cfg: { gateway: { auth: { mode: "token", token: "probe-token" } } },
mode: "local",
}),
);
expect(probeGateway).toHaveBeenCalledWith(
expect.objectContaining({
auth: { token: "probe-token", password: undefined },
}),
);
});
it("stops waiting once the restarted gateway reports the wrong version", async () => {
probeGateway.mockResolvedValue({
ok: true,

View File

@@ -1,6 +1,9 @@
import type { PluginHealthErrorSummary } from "../../commands/health.types.js";
import { createConfigIO } from "../../config/io.js";
import type { OpenClawConfig } from "../../config/types.openclaw.js";
import type { GatewayServiceRuntime } from "../../daemon/service-runtime.js";
import type { GatewayService } from "../../daemon/service.js";
import { resolveGatewayProbeAuthSafeWithSecretInputs } from "../../gateway/probe-auth.js";
import { probeGateway } from "../../gateway/probe.js";
import {
classifyPortListener,
@@ -61,6 +64,11 @@ type GatewayReachability = {
channelProbeErrors: Array<{ id: string; error: string }>;
};
type GatewayRestartProbeAuth = {
token?: string;
password?: string;
};
function hasListenerAttributionGap(portUsage: PortUsage): boolean {
if (portUsage.status !== "busy" || portUsage.listeners.length > 0) {
return false;
@@ -228,9 +236,12 @@ function applyChannelProbeErrors(snapshot: GatewayRestartSnapshot): GatewayResta
async function confirmGatewayReachable(params: {
port: number;
includeHealthDetails?: boolean;
auth?: GatewayRestartProbeAuth;
}): Promise<GatewayReachability> {
const token = normalizeOptionalString(process.env.OPENCLAW_GATEWAY_TOKEN);
const password = normalizeOptionalString(process.env.OPENCLAW_GATEWAY_PASSWORD);
const token = normalizeOptionalString(params.auth?.token ?? process.env.OPENCLAW_GATEWAY_TOKEN);
const password = normalizeOptionalString(
params.auth?.password ?? process.env.OPENCLAW_GATEWAY_PASSWORD,
);
const probe = await probeGateway({
url: `ws://127.0.0.1:${params.port}`,
auth: token || password ? { token, password } : undefined,
@@ -251,13 +262,37 @@ async function confirmGatewayReachable(params: {
};
}
async function inspectGatewayPortHealth(port: number): Promise<GatewayPortHealthSnapshot> {
async function resolveGatewayRestartProbeAuth(
env: NodeJS.ProcessEnv | undefined,
): Promise<GatewayRestartProbeAuth | undefined> {
const mergedEnv = {
...(process.env as Record<string, string | undefined>),
...(env ?? undefined),
} as NodeJS.ProcessEnv;
const cfg = await createConfigIO({
env: mergedEnv,
pluginValidation: "skip",
})
.readBestEffortConfig()
.catch((): OpenClawConfig => ({}));
const resolved = await resolveGatewayProbeAuthSafeWithSecretInputs({
cfg,
mode: "local",
env: mergedEnv,
});
return resolved.auth;
}
async function inspectGatewayPortHealth(params: {
port: number;
auth?: GatewayRestartProbeAuth;
}): Promise<GatewayPortHealthSnapshot> {
let portUsage: PortUsage;
try {
portUsage = await inspectPortUsage(port);
portUsage = await inspectPortUsage(params.port);
} catch (err) {
portUsage = {
port,
port: params.port,
status: "unknown",
listeners: [],
hints: [],
@@ -268,7 +303,12 @@ async function inspectGatewayPortHealth(port: number): Promise<GatewayPortHealth
let healthy = false;
if (portUsage.status === "busy") {
try {
healthy = (await confirmGatewayReachable({ port })).reachable;
healthy = (
await confirmGatewayReachable({
port: params.port,
auth: params.auth,
})
).reachable;
} catch {
// best-effort probe
}
@@ -283,6 +323,7 @@ export async function inspectGatewayRestart(params: {
env?: NodeJS.ProcessEnv;
expectedVersion?: string | null;
includeUnknownListenersAsStale?: boolean;
probeAuth?: GatewayRestartProbeAuth;
}): Promise<GatewayRestartSnapshot> {
const env = params.env ?? process.env;
const expectedVersion = normalizeOptionalString(params.expectedVersion);
@@ -294,6 +335,7 @@ export async function inspectGatewayRestart(params: {
reachability = await confirmGatewayReachable({
port: params.port,
includeHealthDetails: Boolean(expectedVersion),
auth: params.probeAuth,
});
activatedPluginErrors = reachability.activatedPluginErrors;
channelProbeErrors = reachability.channelProbeErrors;
@@ -477,12 +519,14 @@ export async function waitForGatewayHealthyRestart(params: {
const attempts = params.attempts ?? DEFAULT_RESTART_HEALTH_ATTEMPTS;
const delayMs = params.delayMs ?? DEFAULT_RESTART_HEALTH_DELAY_MS;
const probeAuth = await resolveGatewayRestartProbeAuth(params.env).catch(() => undefined);
let snapshot = await inspectGatewayRestart({
service: params.service,
port: params.port,
env: params.env,
expectedVersion: params.expectedVersion,
includeUnknownListenersAsStale: params.includeUnknownListenersAsStale,
probeAuth,
});
let consecutiveStoppedFreeCount = 0;
@@ -523,6 +567,7 @@ export async function waitForGatewayHealthyRestart(params: {
env: params.env,
expectedVersion: params.expectedVersion,
includeUnknownListenersAsStale: params.includeUnknownListenersAsStale,
probeAuth,
});
}
@@ -537,14 +582,21 @@ export async function waitForGatewayHealthyListener(params: {
const attempts = params.attempts ?? DEFAULT_RESTART_HEALTH_ATTEMPTS;
const delayMs = params.delayMs ?? DEFAULT_RESTART_HEALTH_DELAY_MS;
let snapshot = await inspectGatewayPortHealth(params.port);
const probeAuth = await resolveGatewayRestartProbeAuth(undefined).catch(() => undefined);
let snapshot = await inspectGatewayPortHealth({
port: params.port,
auth: probeAuth,
});
for (let attempt = 0; attempt < attempts; attempt += 1) {
if (snapshot.healthy) {
return snapshot;
}
await sleep(delayMs);
snapshot = await inspectGatewayPortHealth(params.port);
snapshot = await inspectGatewayPortHealth({
port: params.port,
auth: probeAuth,
});
}
return snapshot;