From f5173589a45cb2975b7ad5f44247e4e48b3b88e0 Mon Sep 17 00:00:00 2001 From: Ayaan Zaidi Date: Wed, 22 Apr 2026 22:47:04 +0530 Subject: [PATCH] fix(gateway): harden restart acknowledgements --- docs/gateway/configuration.md | 4 -- src/agents/tools/gateway-tool.test.ts | 49 ++++++++++++------- src/agents/tools/gateway-tool.ts | 7 +-- .../reply/commands-session-restart.test.ts | 47 ++++++++++++++++++ src/auto-reply/reply/commands-session.ts | 33 +++++++++++-- .../server-methods/config.shared-auth.test.ts | 10 ++-- src/gateway/server-methods/config.ts | 2 - src/gateway/server-methods/update.test.ts | 19 ++----- src/gateway/server-methods/update.ts | 2 - src/infra/restart-sentinel.test.ts | 5 +- src/infra/restart-sentinel.ts | 7 +-- 11 files changed, 119 insertions(+), 66 deletions(-) diff --git a/docs/gateway/configuration.md b/docs/gateway/configuration.md index 21fe79a37e2..c28c791b624 100644 --- a/docs/gateway/configuration.md +++ b/docs/gateway/configuration.md @@ -606,10 +606,6 @@ then `config.patch`. - `note` (optional) — note for the restart sentinel - `restartDelayMs` (optional) — delay before restart (default 2000) - When `sessionKey` is present and no explicit continuation is provided, the - Gateway asks that session's agent to acknowledge the successful restart - after boot. - Restart requests are coalesced while one is already pending/in-flight, and a 30-second cooldown applies between restart cycles. ```bash diff --git a/src/agents/tools/gateway-tool.test.ts b/src/agents/tools/gateway-tool.test.ts index fda259d2f34..9d2ac2a4aae 100644 --- a/src/agents/tools/gateway-tool.test.ts +++ b/src/agents/tools/gateway-tool.test.ts @@ -69,28 +69,16 @@ describe("gateway tool restart continuation", () => { scheduleGatewaySigusr1RestartMock.mockReturnValue({ scheduled: true, delayMs: 250 }); }); - it("uses a flat enum for continuationKind in the tool schema", async () => { + it("does not expose system-event continuations to the agent tool", async () => { const { createGatewayTool } = await import("./gateway-tool.js"); const tool = createGatewayTool(); - const continuationKind = ( - tool.parameters as { - properties?: { - continuationKind?: { - type?: string; - enum?: string[]; - anyOf?: unknown[]; - }; - }; - } - ).properties?.continuationKind; - expect(continuationKind).toEqual( - expect.objectContaining({ - type: "string", - enum: ["systemEvent", "agentTurn"], - }), - ); - expect(continuationKind).not.toHaveProperty("anyOf"); + const parameters = tool.parameters as { + properties?: { + continuationKind?: unknown; + }; + }; + expect(parameters.properties?.continuationKind).toBeUndefined(); }); it("instructs agents to use continuationMessage when a restart still needs a reply", async () => { @@ -142,6 +130,29 @@ describe("gateway tool restart continuation", () => { expect(result?.details).toEqual({ scheduled: true, delayMs: 250 }); }); + it("coerces legacy continuationKind inputs to an agentTurn", async () => { + const { createGatewayTool } = await import("./gateway-tool.js"); + const tool = createGatewayTool({ + agentSessionKey: "agent:main:main", + config: {}, + }); + + await tool.execute?.("tool-call-1", { + action: "restart", + continuationKind: "systemEvent", + continuationMessage: "Reply after restart", + }); + + expect(writeRestartSentinelMock).toHaveBeenCalledWith( + expect.objectContaining({ + continuation: { + kind: "agentTurn", + message: "Reply after restart", + }, + }), + ); + }); + it("defaults session-scoped restarts to a success continuation", async () => { const { createGatewayTool } = await import("./gateway-tool.js"); const { DEFAULT_RESTART_SUCCESS_CONTINUATION_MESSAGE } = diff --git a/src/agents/tools/gateway-tool.ts b/src/agents/tools/gateway-tool.ts index 98f8e72ca5a..f8a65cf75a4 100644 --- a/src/agents/tools/gateway-tool.ts +++ b/src/agents/tools/gateway-tool.ts @@ -15,7 +15,7 @@ import { scheduleGatewaySigusr1Restart } from "../../infra/restart.js"; import { createSubsystemLogger } from "../../logging/subsystem.js"; import { collectEnabledInsecureOrDangerousFlags } from "../../security/dangerous-config-flags.js"; import { normalizeOptionalString, readStringValue } from "../../shared/string-coerce.js"; -import { optionalStringEnum, stringEnum } from "../schema/typebox.js"; +import { stringEnum } from "../schema/typebox.js"; import { type AnyAgentTool, jsonResult, readStringParam } from "./common.js"; import { callGatewayTool, readGatewayCallOptions } from "./gateway.js"; import { isOpenClawOwnerOnlyCoreToolName } from "./owner-only-tools.js"; @@ -290,7 +290,6 @@ const GatewayToolSchema = Type.Object({ // restart delayMs: Type.Optional(Type.Number()), reason: Type.Optional(Type.String()), - continuationKind: optionalStringEnum(["systemEvent", "agentTurn"] as const), continuationMessage: Type.Optional(Type.String()), // config.get, config.schema.lookup, config.apply, update.run gatewayUrl: Type.Optional(Type.String()), @@ -320,7 +319,7 @@ export function createGatewayTool(opts?: { name: "gateway", ownerOnly: isOpenClawOwnerOnlyCoreToolName("gateway"), description: - "Restart, inspect a specific config schema path, apply config, or update the gateway in-place (SIGUSR1). Use config.schema.lookup with a targeted dot path before config edits. Use config.patch for safe partial config updates (merges with existing). Use config.apply only when replacing entire config. Config writes hot-reload when possible and restart when required. Always pass a human-readable completion message via the `note` parameter so the system can deliver it to the user after restart. If restarting during a user task and you still owe the user a reply, pass a specific one-shot `continuationMessage` for what to verify or report after boot; do not write restart sentinel files directly. Use `continuationKind` only when it should be a system event instead of a normal agent turn.", + "Restart, inspect a specific config schema path, apply config, or update the gateway in-place (SIGUSR1). Use config.schema.lookup with a targeted dot path before config edits. Use config.patch for safe partial config updates (merges with existing). Use config.apply only when replacing entire config. Config writes hot-reload when possible and restart when required. Always pass a human-readable completion message via the `note` parameter so the system can deliver it to the user after restart. If restarting during a user task and you still owe the user a reply, pass a specific one-shot `continuationMessage` for what to verify or report after boot; do not write restart sentinel files directly.", parameters: GatewayToolSchema, execute: async (_toolCallId, args) => { const params = args as Record; @@ -339,7 +338,6 @@ export function createGatewayTool(opts?: { const reason = normalizeOptionalString(params.reason)?.slice(0, 200); const note = normalizeOptionalString(params.note); const continuationMessage = normalizeOptionalString(params.continuationMessage); - const continuationKind = normalizeOptionalString(params.continuationKind); // Extract channel + threadId for routing after restart. // Uses generic :thread: parsing plus plugin-owned session grammars. const { deliveryContext, threadId } = extractDeliveryInfo(sessionKey); @@ -353,7 +351,6 @@ export function createGatewayTool(opts?: { message: note ?? reason ?? null, continuation: buildRestartSuccessContinuation({ sessionKey, - continuationKind, continuationMessage, }), doctorHint: formatDoctorNonInteractiveHint(), diff --git a/src/auto-reply/reply/commands-session-restart.test.ts b/src/auto-reply/reply/commands-session-restart.test.ts index c6099d31ad6..f8f0dc270c5 100644 --- a/src/auto-reply/reply/commands-session-restart.test.ts +++ b/src/auto-reply/reply/commands-session-restart.test.ts @@ -3,6 +3,7 @@ import type { RestartSentinelPayload } from "../../infra/restart-sentinel.js"; import type { HandleCommandsParams } from "./commands-types.js"; const mocks = vi.hoisted(() => ({ + unlink: vi.fn(async (_path: string) => undefined), isRestartEnabled: vi.fn(() => true), extractDeliveryInfo: vi.fn(() => ({ deliveryContext: { @@ -18,6 +19,13 @@ const mocks = vi.hoisted(() => ({ triggerOpenClawRestart: vi.fn(() => ({ ok: true, method: "launchctl" })), })); +vi.mock("node:fs/promises", () => ({ + default: { + unlink: mocks.unlink, + }, + unlink: mocks.unlink, +})); + vi.mock("../../config/commands.flags.js", () => ({ isRestartEnabled: mocks.isRestartEnabled, })); @@ -98,6 +106,7 @@ describe("handleRestartCommand", () => { beforeEach(() => { mocks.isRestartEnabled.mockReset(); mocks.isRestartEnabled.mockReturnValue(true); + mocks.unlink.mockClear(); mocks.extractDeliveryInfo.mockClear(); mocks.formatDoctorNonInteractiveHint.mockClear(); mocks.writeRestartSentinel.mockClear(); @@ -133,4 +142,42 @@ describe("handleRestartCommand", () => { ); expect(mocks.triggerOpenClawRestart).toHaveBeenCalledTimes(1); }); + + it("rejects authorized non-owner restart commands", async () => { + const result = await handleRestartCommand( + restartCommandParams({ + command: { + ...restartCommandParams().command, + senderIsOwner: false, + isAuthorizedSender: true, + }, + }), + true, + ); + + expect(result).toEqual({ shouldContinue: false }); + expect(mocks.writeRestartSentinel).not.toHaveBeenCalled(); + expect(mocks.triggerOpenClawRestart).not.toHaveBeenCalled(); + }); + + it("does not restart when the sentinel cannot be written", async () => { + mocks.writeRestartSentinel.mockRejectedValueOnce(new Error("disk full")); + + const result = await handleRestartCommand(restartCommandParams(), true); + + expect(result?.reply?.text).toContain("could not persist"); + expect(mocks.triggerOpenClawRestart).not.toHaveBeenCalled(); + }); + + it("removes the success sentinel when fallback restart fails", async () => { + mocks.triggerOpenClawRestart.mockReturnValueOnce({ + ok: false, + method: "launchctl", + }); + + const result = await handleRestartCommand(restartCommandParams(), true); + + expect(result?.reply?.text).toContain("Restart failed"); + expect(mocks.unlink).toHaveBeenCalledWith("/tmp/sentinel.json"); + }); }); diff --git a/src/auto-reply/reply/commands-session.ts b/src/auto-reply/reply/commands-session.ts index 4ba7f2332c5..ba1f87ad918 100644 --- a/src/auto-reply/reply/commands-session.ts +++ b/src/auto-reply/reply/commands-session.ts @@ -1,3 +1,4 @@ +import fs from "node:fs/promises"; import { resolveSessionAgentId } from "../../agents/agent-scope.js"; import { resolveFastModeState } from "../../agents/fast-mode.js"; import { @@ -41,10 +42,10 @@ const SESSION_DURATION_OFF_VALUES = new Set(["off", "disable", "disabled", "none const SESSION_ACTION_IDLE = "idle"; const SESSION_ACTION_MAX_AGE = "max-age"; -async function writeRestartCommandSentinel(params: HandleCommandsParams) { +async function writeRestartCommandSentinel(params: HandleCommandsParams): Promise { const sessionKey = normalizeOptionalString(params.sessionKey); if (!sessionKey) { - return; + return null; } const { deliveryContext, threadId } = extractDeliveryInfo(sessionKey); const payload: RestartSentinelPayload = { @@ -62,7 +63,14 @@ async function writeRestartCommandSentinel(params: HandleCommandsParams) { reason: "/restart", }, }; - await writeRestartSentinel(payload).catch(() => {}); + return await writeRestartSentinel(payload); +} + +async function removeRestartCommandSentinel(filePath: string | null) { + if (!filePath) { + return; + } + await fs.unlink(filePath).catch(() => {}); } function resolveSessionCommandUsage() { @@ -662,6 +670,10 @@ export const handleRestartCommand: CommandHandler = async (params, allowTextComm ); return { shouldContinue: false }; } + const nonOwner = rejectNonOwnerCommand(params, "/restart"); + if (nonOwner) { + return nonOwner; + } if (!isRestartEnabled(params.cfg)) { return { shouldContinue: false, @@ -671,8 +683,19 @@ export const handleRestartCommand: CommandHandler = async (params, allowTextComm }; } const hasSigusr1Listener = process.listenerCount("SIGUSR1") > 0; + let sentinelPath: string | null = null; + try { + sentinelPath = await writeRestartCommandSentinel(params); + } catch (err) { + logVerbose(`failed to write /restart sentinel: ${String(err)}`); + return { + shouldContinue: false, + reply: { + text: "⚠️ Restart failed: could not persist the post-restart acknowledgement.", + }, + }; + } if (hasSigusr1Listener) { - await writeRestartCommandSentinel(params); scheduleGatewaySigusr1Restart({ reason: "/restart" }); return { shouldContinue: false, @@ -681,9 +704,9 @@ export const handleRestartCommand: CommandHandler = async (params, allowTextComm }, }; } - await writeRestartCommandSentinel(params); const restartMethod = triggerOpenClawRestart(); if (!restartMethod.ok) { + await removeRestartCommandSentinel(sentinelPath); const detail = restartMethod.detail ? ` Details: ${restartMethod.detail}` : ""; return { shouldContinue: false, diff --git a/src/gateway/server-methods/config.shared-auth.test.ts b/src/gateway/server-methods/config.shared-auth.test.ts index b97baaf964c..10fda1f8f65 100644 --- a/src/gateway/server-methods/config.shared-auth.test.ts +++ b/src/gateway/server-methods/config.shared-auth.test.ts @@ -186,9 +186,7 @@ describe("config shared auth disconnects", () => { expect(scheduleGatewaySigusr1RestartMock).toHaveBeenCalledTimes(1); }); - it("adds a default continuation to session-scoped restart sentinels", async () => { - const { DEFAULT_RESTART_SUCCESS_CONTINUATION_MESSAGE } = - await import("../../infra/restart-sentinel.js"); + it("does not add an agent continuation from generic control-plane sessionKey params", async () => { const prevConfig: OpenClawConfig = { gateway: { reload: { @@ -213,11 +211,9 @@ describe("config shared auth disconnects", () => { expect(restartSentinelMocks.writeRestartSentinel).toHaveBeenCalledWith( expect.objectContaining({ sessionKey: "agent:main:main", - continuation: { - kind: "agentTurn", - message: DEFAULT_RESTART_SUCCESS_CONTINUATION_MESSAGE, - }, }), ); + const payload = restartSentinelMocks.writeRestartSentinel.mock.calls.at(-1)?.[0]; + expect(payload?.continuation).toBeUndefined(); }); }); diff --git a/src/gateway/server-methods/config.ts b/src/gateway/server-methods/config.ts index 57f85f5fc2c..009b9c8d380 100644 --- a/src/gateway/server-methods/config.ts +++ b/src/gateway/server-methods/config.ts @@ -22,7 +22,6 @@ import { extractDeliveryInfo } from "../../config/sessions.js"; import type { ConfigValidationIssue, OpenClawConfig } from "../../config/types.openclaw.js"; import { formatErrorMessage } from "../../infra/errors.js"; import { - buildRestartSuccessContinuation, formatDoctorNonInteractiveHint, type RestartSentinelPayload, writeRestartSentinel, @@ -368,7 +367,6 @@ function buildConfigRestartSentinelPayload(params: { deliveryContext: params.deliveryContext, threadId: params.threadId, message: params.note ?? null, - continuation: buildRestartSuccessContinuation({ sessionKey: params.sessionKey }), doctorHint: formatDoctorNonInteractiveHint(), stats: { mode: params.mode, diff --git a/src/gateway/server-methods/update.test.ts b/src/gateway/server-methods/update.test.ts index d0cc726c52f..00a2c4bad5c 100644 --- a/src/gateway/server-methods/update.test.ts +++ b/src/gateway/server-methods/update.test.ts @@ -1,8 +1,5 @@ import { beforeEach, describe, expect, it, vi } from "vitest"; -import { - DEFAULT_RESTART_SUCCESS_CONTINUATION_MESSAGE, - type RestartSentinelPayload, -} from "../../infra/restart-sentinel.js"; +import type { RestartSentinelPayload } from "../../infra/restart-sentinel.js"; import type { UpdateRunResult } from "../../infra/update-runner.js"; // Capture the sentinel payload written during update.run @@ -125,10 +122,7 @@ describe("update.run sentinel deliveryContext", () => { to: "webchat:user-123", accountId: "default", }); - expect(capturedPayload!.continuation).toEqual({ - kind: "agentTurn", - message: DEFAULT_RESTART_SUCCESS_CONTINUATION_MESSAGE, - }); + expect(capturedPayload!.continuation).toBeUndefined(); }); it("omits deliveryContext when no sessionKey is provided", async () => { @@ -139,7 +133,7 @@ describe("update.run sentinel deliveryContext", () => { expect(capturedPayload).toBeDefined(); expect(capturedPayload!.deliveryContext).toBeUndefined(); expect(capturedPayload!.threadId).toBeUndefined(); - expect(capturedPayload!.continuation).toBeNull(); + expect(capturedPayload!.continuation).toBeUndefined(); }); it("includes threadId in sentinel payload for threaded sessions", async () => { @@ -154,10 +148,7 @@ describe("update.run sentinel deliveryContext", () => { accountId: "workspace-1", }); expect(capturedPayload!.threadId).toBe("1234567890.123456"); - expect(capturedPayload!.continuation).toEqual({ - kind: "agentTurn", - message: DEFAULT_RESTART_SUCCESS_CONTINUATION_MESSAGE, - }); + expect(capturedPayload!.continuation).toBeUndefined(); }); }); @@ -206,6 +197,6 @@ describe("update.run restart scheduling", () => { expect(scheduleGatewaySigusr1RestartMock).not.toHaveBeenCalled(); expect(payload?.ok).toBe(false); expect(payload?.restart).toBeNull(); - expect(capturedPayload?.continuation).toBeNull(); + expect(capturedPayload?.continuation).toBeUndefined(); }); }); diff --git a/src/gateway/server-methods/update.ts b/src/gateway/server-methods/update.ts index ca9bc97a9d4..47f4f0fe8d9 100644 --- a/src/gateway/server-methods/update.ts +++ b/src/gateway/server-methods/update.ts @@ -2,7 +2,6 @@ import { loadConfig } from "../../config/config.js"; import { extractDeliveryInfo } from "../../config/sessions.js"; import { resolveOpenClawPackageRoot } from "../../infra/openclaw-root.js"; import { - buildRestartSuccessContinuation, formatDoctorNonInteractiveHint, type RestartSentinelPayload, writeRestartSentinel, @@ -73,7 +72,6 @@ export const updateHandlers: GatewayRequestHandlers = { deliveryContext, threadId, message: note ?? null, - continuation: result.status === "ok" ? buildRestartSuccessContinuation({ sessionKey }) : null, doctorHint: formatDoctorNonInteractiveHint(), stats: { mode: result.mode, diff --git a/src/infra/restart-sentinel.test.ts b/src/infra/restart-sentinel.test.ts index a8ba1136510..ff06d701290 100644 --- a/src/infra/restart-sentinel.test.ts +++ b/src/infra/restart-sentinel.test.ts @@ -198,12 +198,11 @@ describe("restart success continuation", () => { expect( buildRestartSuccessContinuation({ sessionKey: "agent:main:main", - continuationKind: "systemEvent", continuationMessage: "wake after restart", }), ).toEqual({ - kind: "systemEvent", - text: "wake after restart", + kind: "agentTurn", + message: "wake after restart", }); }); diff --git a/src/infra/restart-sentinel.ts b/src/infra/restart-sentinel.ts index 63eeb2cee2e..5ad42adaa7a 100644 --- a/src/infra/restart-sentinel.ts +++ b/src/infra/restart-sentinel.ts @@ -83,20 +83,17 @@ export async function writeRestartSentinel( ) { const filePath = resolveRestartSentinelPath(env); const data: RestartSentinel = { version: 1, payload }; - await writeJsonAtomic(filePath, data, { trailingNewline: true }); + await writeJsonAtomic(filePath, data, { trailingNewline: true, ensureDirMode: 0o700 }); return filePath; } export function buildRestartSuccessContinuation(params: { sessionKey?: string; - continuationKind?: string | null; continuationMessage?: string | null; }): RestartSentinelContinuation | null { const message = params.continuationMessage?.trim(); if (message) { - return params.continuationKind === "systemEvent" - ? { kind: "systemEvent", text: message } - : { kind: "agentTurn", message }; + return { kind: "agentTurn", message }; } return params.sessionKey?.trim() ? { kind: "agentTurn", message: DEFAULT_RESTART_SUCCESS_CONTINUATION_MESSAGE }