fix(gateway): harden restart acknowledgements

This commit is contained in:
Ayaan Zaidi
2026-04-22 22:47:04 +05:30
parent 95ecb0526e
commit f5173589a4
11 changed files with 119 additions and 66 deletions

View File

@@ -606,10 +606,6 @@ then `config.patch`.
- `note` (optional) — note for the restart sentinel
- `restartDelayMs` (optional) — delay before restart (default 2000)
When `sessionKey` is present and no explicit continuation is provided, the
Gateway asks that session's agent to acknowledge the successful restart
after boot.
Restart requests are coalesced while one is already pending/in-flight, and a 30-second cooldown applies between restart cycles.
```bash

View File

@@ -69,28 +69,16 @@ describe("gateway tool restart continuation", () => {
scheduleGatewaySigusr1RestartMock.mockReturnValue({ scheduled: true, delayMs: 250 });
});
it("uses a flat enum for continuationKind in the tool schema", async () => {
it("does not expose system-event continuations to the agent tool", async () => {
const { createGatewayTool } = await import("./gateway-tool.js");
const tool = createGatewayTool();
const continuationKind = (
tool.parameters as {
properties?: {
continuationKind?: {
type?: string;
enum?: string[];
anyOf?: unknown[];
};
};
}
).properties?.continuationKind;
expect(continuationKind).toEqual(
expect.objectContaining({
type: "string",
enum: ["systemEvent", "agentTurn"],
}),
);
expect(continuationKind).not.toHaveProperty("anyOf");
const parameters = tool.parameters as {
properties?: {
continuationKind?: unknown;
};
};
expect(parameters.properties?.continuationKind).toBeUndefined();
});
it("instructs agents to use continuationMessage when a restart still needs a reply", async () => {
@@ -142,6 +130,29 @@ describe("gateway tool restart continuation", () => {
expect(result?.details).toEqual({ scheduled: true, delayMs: 250 });
});
it("coerces legacy continuationKind inputs to an agentTurn", async () => {
const { createGatewayTool } = await import("./gateway-tool.js");
const tool = createGatewayTool({
agentSessionKey: "agent:main:main",
config: {},
});
await tool.execute?.("tool-call-1", {
action: "restart",
continuationKind: "systemEvent",
continuationMessage: "Reply after restart",
});
expect(writeRestartSentinelMock).toHaveBeenCalledWith(
expect.objectContaining({
continuation: {
kind: "agentTurn",
message: "Reply after restart",
},
}),
);
});
it("defaults session-scoped restarts to a success continuation", async () => {
const { createGatewayTool } = await import("./gateway-tool.js");
const { DEFAULT_RESTART_SUCCESS_CONTINUATION_MESSAGE } =

View File

@@ -15,7 +15,7 @@ import { scheduleGatewaySigusr1Restart } from "../../infra/restart.js";
import { createSubsystemLogger } from "../../logging/subsystem.js";
import { collectEnabledInsecureOrDangerousFlags } from "../../security/dangerous-config-flags.js";
import { normalizeOptionalString, readStringValue } from "../../shared/string-coerce.js";
import { optionalStringEnum, stringEnum } from "../schema/typebox.js";
import { stringEnum } from "../schema/typebox.js";
import { type AnyAgentTool, jsonResult, readStringParam } from "./common.js";
import { callGatewayTool, readGatewayCallOptions } from "./gateway.js";
import { isOpenClawOwnerOnlyCoreToolName } from "./owner-only-tools.js";
@@ -290,7 +290,6 @@ const GatewayToolSchema = Type.Object({
// restart
delayMs: Type.Optional(Type.Number()),
reason: Type.Optional(Type.String()),
continuationKind: optionalStringEnum(["systemEvent", "agentTurn"] as const),
continuationMessage: Type.Optional(Type.String()),
// config.get, config.schema.lookup, config.apply, update.run
gatewayUrl: Type.Optional(Type.String()),
@@ -320,7 +319,7 @@ export function createGatewayTool(opts?: {
name: "gateway",
ownerOnly: isOpenClawOwnerOnlyCoreToolName("gateway"),
description:
"Restart, inspect a specific config schema path, apply config, or update the gateway in-place (SIGUSR1). Use config.schema.lookup with a targeted dot path before config edits. Use config.patch for safe partial config updates (merges with existing). Use config.apply only when replacing entire config. Config writes hot-reload when possible and restart when required. Always pass a human-readable completion message via the `note` parameter so the system can deliver it to the user after restart. If restarting during a user task and you still owe the user a reply, pass a specific one-shot `continuationMessage` for what to verify or report after boot; do not write restart sentinel files directly. Use `continuationKind` only when it should be a system event instead of a normal agent turn.",
"Restart, inspect a specific config schema path, apply config, or update the gateway in-place (SIGUSR1). Use config.schema.lookup with a targeted dot path before config edits. Use config.patch for safe partial config updates (merges with existing). Use config.apply only when replacing entire config. Config writes hot-reload when possible and restart when required. Always pass a human-readable completion message via the `note` parameter so the system can deliver it to the user after restart. If restarting during a user task and you still owe the user a reply, pass a specific one-shot `continuationMessage` for what to verify or report after boot; do not write restart sentinel files directly.",
parameters: GatewayToolSchema,
execute: async (_toolCallId, args) => {
const params = args as Record<string, unknown>;
@@ -339,7 +338,6 @@ export function createGatewayTool(opts?: {
const reason = normalizeOptionalString(params.reason)?.slice(0, 200);
const note = normalizeOptionalString(params.note);
const continuationMessage = normalizeOptionalString(params.continuationMessage);
const continuationKind = normalizeOptionalString(params.continuationKind);
// Extract channel + threadId for routing after restart.
// Uses generic :thread: parsing plus plugin-owned session grammars.
const { deliveryContext, threadId } = extractDeliveryInfo(sessionKey);
@@ -353,7 +351,6 @@ export function createGatewayTool(opts?: {
message: note ?? reason ?? null,
continuation: buildRestartSuccessContinuation({
sessionKey,
continuationKind,
continuationMessage,
}),
doctorHint: formatDoctorNonInteractiveHint(),

View File

@@ -3,6 +3,7 @@ import type { RestartSentinelPayload } from "../../infra/restart-sentinel.js";
import type { HandleCommandsParams } from "./commands-types.js";
const mocks = vi.hoisted(() => ({
unlink: vi.fn(async (_path: string) => undefined),
isRestartEnabled: vi.fn(() => true),
extractDeliveryInfo: vi.fn(() => ({
deliveryContext: {
@@ -18,6 +19,13 @@ const mocks = vi.hoisted(() => ({
triggerOpenClawRestart: vi.fn(() => ({ ok: true, method: "launchctl" })),
}));
vi.mock("node:fs/promises", () => ({
default: {
unlink: mocks.unlink,
},
unlink: mocks.unlink,
}));
vi.mock("../../config/commands.flags.js", () => ({
isRestartEnabled: mocks.isRestartEnabled,
}));
@@ -98,6 +106,7 @@ describe("handleRestartCommand", () => {
beforeEach(() => {
mocks.isRestartEnabled.mockReset();
mocks.isRestartEnabled.mockReturnValue(true);
mocks.unlink.mockClear();
mocks.extractDeliveryInfo.mockClear();
mocks.formatDoctorNonInteractiveHint.mockClear();
mocks.writeRestartSentinel.mockClear();
@@ -133,4 +142,42 @@ describe("handleRestartCommand", () => {
);
expect(mocks.triggerOpenClawRestart).toHaveBeenCalledTimes(1);
});
it("rejects authorized non-owner restart commands", async () => {
const result = await handleRestartCommand(
restartCommandParams({
command: {
...restartCommandParams().command,
senderIsOwner: false,
isAuthorizedSender: true,
},
}),
true,
);
expect(result).toEqual({ shouldContinue: false });
expect(mocks.writeRestartSentinel).not.toHaveBeenCalled();
expect(mocks.triggerOpenClawRestart).not.toHaveBeenCalled();
});
it("does not restart when the sentinel cannot be written", async () => {
mocks.writeRestartSentinel.mockRejectedValueOnce(new Error("disk full"));
const result = await handleRestartCommand(restartCommandParams(), true);
expect(result?.reply?.text).toContain("could not persist");
expect(mocks.triggerOpenClawRestart).not.toHaveBeenCalled();
});
it("removes the success sentinel when fallback restart fails", async () => {
mocks.triggerOpenClawRestart.mockReturnValueOnce({
ok: false,
method: "launchctl",
});
const result = await handleRestartCommand(restartCommandParams(), true);
expect(result?.reply?.text).toContain("Restart failed");
expect(mocks.unlink).toHaveBeenCalledWith("/tmp/sentinel.json");
});
});

View File

@@ -1,3 +1,4 @@
import fs from "node:fs/promises";
import { resolveSessionAgentId } from "../../agents/agent-scope.js";
import { resolveFastModeState } from "../../agents/fast-mode.js";
import {
@@ -41,10 +42,10 @@ const SESSION_DURATION_OFF_VALUES = new Set(["off", "disable", "disabled", "none
const SESSION_ACTION_IDLE = "idle";
const SESSION_ACTION_MAX_AGE = "max-age";
async function writeRestartCommandSentinel(params: HandleCommandsParams) {
async function writeRestartCommandSentinel(params: HandleCommandsParams): Promise<string | null> {
const sessionKey = normalizeOptionalString(params.sessionKey);
if (!sessionKey) {
return;
return null;
}
const { deliveryContext, threadId } = extractDeliveryInfo(sessionKey);
const payload: RestartSentinelPayload = {
@@ -62,7 +63,14 @@ async function writeRestartCommandSentinel(params: HandleCommandsParams) {
reason: "/restart",
},
};
await writeRestartSentinel(payload).catch(() => {});
return await writeRestartSentinel(payload);
}
async function removeRestartCommandSentinel(filePath: string | null) {
if (!filePath) {
return;
}
await fs.unlink(filePath).catch(() => {});
}
function resolveSessionCommandUsage() {
@@ -662,6 +670,10 @@ export const handleRestartCommand: CommandHandler = async (params, allowTextComm
);
return { shouldContinue: false };
}
const nonOwner = rejectNonOwnerCommand(params, "/restart");
if (nonOwner) {
return nonOwner;
}
if (!isRestartEnabled(params.cfg)) {
return {
shouldContinue: false,
@@ -671,8 +683,19 @@ export const handleRestartCommand: CommandHandler = async (params, allowTextComm
};
}
const hasSigusr1Listener = process.listenerCount("SIGUSR1") > 0;
let sentinelPath: string | null = null;
try {
sentinelPath = await writeRestartCommandSentinel(params);
} catch (err) {
logVerbose(`failed to write /restart sentinel: ${String(err)}`);
return {
shouldContinue: false,
reply: {
text: "⚠️ Restart failed: could not persist the post-restart acknowledgement.",
},
};
}
if (hasSigusr1Listener) {
await writeRestartCommandSentinel(params);
scheduleGatewaySigusr1Restart({ reason: "/restart" });
return {
shouldContinue: false,
@@ -681,9 +704,9 @@ export const handleRestartCommand: CommandHandler = async (params, allowTextComm
},
};
}
await writeRestartCommandSentinel(params);
const restartMethod = triggerOpenClawRestart();
if (!restartMethod.ok) {
await removeRestartCommandSentinel(sentinelPath);
const detail = restartMethod.detail ? ` Details: ${restartMethod.detail}` : "";
return {
shouldContinue: false,

View File

@@ -186,9 +186,7 @@ describe("config shared auth disconnects", () => {
expect(scheduleGatewaySigusr1RestartMock).toHaveBeenCalledTimes(1);
});
it("adds a default continuation to session-scoped restart sentinels", async () => {
const { DEFAULT_RESTART_SUCCESS_CONTINUATION_MESSAGE } =
await import("../../infra/restart-sentinel.js");
it("does not add an agent continuation from generic control-plane sessionKey params", async () => {
const prevConfig: OpenClawConfig = {
gateway: {
reload: {
@@ -213,11 +211,9 @@ describe("config shared auth disconnects", () => {
expect(restartSentinelMocks.writeRestartSentinel).toHaveBeenCalledWith(
expect.objectContaining({
sessionKey: "agent:main:main",
continuation: {
kind: "agentTurn",
message: DEFAULT_RESTART_SUCCESS_CONTINUATION_MESSAGE,
},
}),
);
const payload = restartSentinelMocks.writeRestartSentinel.mock.calls.at(-1)?.[0];
expect(payload?.continuation).toBeUndefined();
});
});

View File

@@ -22,7 +22,6 @@ import { extractDeliveryInfo } from "../../config/sessions.js";
import type { ConfigValidationIssue, OpenClawConfig } from "../../config/types.openclaw.js";
import { formatErrorMessage } from "../../infra/errors.js";
import {
buildRestartSuccessContinuation,
formatDoctorNonInteractiveHint,
type RestartSentinelPayload,
writeRestartSentinel,
@@ -368,7 +367,6 @@ function buildConfigRestartSentinelPayload(params: {
deliveryContext: params.deliveryContext,
threadId: params.threadId,
message: params.note ?? null,
continuation: buildRestartSuccessContinuation({ sessionKey: params.sessionKey }),
doctorHint: formatDoctorNonInteractiveHint(),
stats: {
mode: params.mode,

View File

@@ -1,8 +1,5 @@
import { beforeEach, describe, expect, it, vi } from "vitest";
import {
DEFAULT_RESTART_SUCCESS_CONTINUATION_MESSAGE,
type RestartSentinelPayload,
} from "../../infra/restart-sentinel.js";
import type { RestartSentinelPayload } from "../../infra/restart-sentinel.js";
import type { UpdateRunResult } from "../../infra/update-runner.js";
// Capture the sentinel payload written during update.run
@@ -125,10 +122,7 @@ describe("update.run sentinel deliveryContext", () => {
to: "webchat:user-123",
accountId: "default",
});
expect(capturedPayload!.continuation).toEqual({
kind: "agentTurn",
message: DEFAULT_RESTART_SUCCESS_CONTINUATION_MESSAGE,
});
expect(capturedPayload!.continuation).toBeUndefined();
});
it("omits deliveryContext when no sessionKey is provided", async () => {
@@ -139,7 +133,7 @@ describe("update.run sentinel deliveryContext", () => {
expect(capturedPayload).toBeDefined();
expect(capturedPayload!.deliveryContext).toBeUndefined();
expect(capturedPayload!.threadId).toBeUndefined();
expect(capturedPayload!.continuation).toBeNull();
expect(capturedPayload!.continuation).toBeUndefined();
});
it("includes threadId in sentinel payload for threaded sessions", async () => {
@@ -154,10 +148,7 @@ describe("update.run sentinel deliveryContext", () => {
accountId: "workspace-1",
});
expect(capturedPayload!.threadId).toBe("1234567890.123456");
expect(capturedPayload!.continuation).toEqual({
kind: "agentTurn",
message: DEFAULT_RESTART_SUCCESS_CONTINUATION_MESSAGE,
});
expect(capturedPayload!.continuation).toBeUndefined();
});
});
@@ -206,6 +197,6 @@ describe("update.run restart scheduling", () => {
expect(scheduleGatewaySigusr1RestartMock).not.toHaveBeenCalled();
expect(payload?.ok).toBe(false);
expect(payload?.restart).toBeNull();
expect(capturedPayload?.continuation).toBeNull();
expect(capturedPayload?.continuation).toBeUndefined();
});
});

View File

@@ -2,7 +2,6 @@ import { loadConfig } from "../../config/config.js";
import { extractDeliveryInfo } from "../../config/sessions.js";
import { resolveOpenClawPackageRoot } from "../../infra/openclaw-root.js";
import {
buildRestartSuccessContinuation,
formatDoctorNonInteractiveHint,
type RestartSentinelPayload,
writeRestartSentinel,
@@ -73,7 +72,6 @@ export const updateHandlers: GatewayRequestHandlers = {
deliveryContext,
threadId,
message: note ?? null,
continuation: result.status === "ok" ? buildRestartSuccessContinuation({ sessionKey }) : null,
doctorHint: formatDoctorNonInteractiveHint(),
stats: {
mode: result.mode,

View File

@@ -198,12 +198,11 @@ describe("restart success continuation", () => {
expect(
buildRestartSuccessContinuation({
sessionKey: "agent:main:main",
continuationKind: "systemEvent",
continuationMessage: "wake after restart",
}),
).toEqual({
kind: "systemEvent",
text: "wake after restart",
kind: "agentTurn",
message: "wake after restart",
});
});

View File

@@ -83,20 +83,17 @@ export async function writeRestartSentinel(
) {
const filePath = resolveRestartSentinelPath(env);
const data: RestartSentinel = { version: 1, payload };
await writeJsonAtomic(filePath, data, { trailingNewline: true });
await writeJsonAtomic(filePath, data, { trailingNewline: true, ensureDirMode: 0o700 });
return filePath;
}
export function buildRestartSuccessContinuation(params: {
sessionKey?: string;
continuationKind?: string | null;
continuationMessage?: string | null;
}): RestartSentinelContinuation | null {
const message = params.continuationMessage?.trim();
if (message) {
return params.continuationKind === "systemEvent"
? { kind: "systemEvent", text: message }
: { kind: "agentTurn", message };
return { kind: "agentTurn", message };
}
return params.sessionKey?.trim()
? { kind: "agentTurn", message: DEFAULT_RESTART_SUCCESS_CONTINUATION_MESSAGE }