fix(gateway): trace restart intent reasons

This commit is contained in:
Peter Steinberger
2026-05-16 21:20:59 +01:00
parent c421be6c90
commit 6369bf64cd
8 changed files with 72 additions and 6 deletions

View File

@@ -55,6 +55,7 @@ Docs: https://docs.openclaw.ai
- Codex app-server: keep long-running turns alive while current-turn approvals, user input, dynamic tools, and notifications make progress, and carry that progress into the outer run timeout. (#82601) Thanks @100yenadmin.
- Gateway/channels: hand off traced channel account startup outside the startup diagnostic phase so long-lived channel tasks do not keep liveness warnings pinned to channel startup. Refs #82398.
- Gateway/restart: queue restart and shutdown signals received while the gateway startup loop is still returning its server handle, so startup-time restarts are not dropped during update churn. (#82660) Thanks @samzong.
- Gateway/restart: carry operator restart intent reasons into macOS LaunchAgent restart traces, so cascade diagnostics identify `gateway.restart` instead of a bare SIGTERM.
- GitHub Copilot: route device-login requests through the plugin SSRF guard with a GitHub-only policy.
- Group/channel replies: keep message-tool-preferred final replies private when the agent misses the message tool, and log suppressed payload metadata in the gateway debug log for quieter diagnosis.
- Gateway/WebChat: route image attachments through a configured vision-capable `imageModel` plan before inlining images, and carry that image-model fallback chain through runtime retries. (#82524) Thanks @frankekn.

View File

@@ -346,7 +346,10 @@ describe("runServiceRestart token drift", () => {
await runServiceRestart(createServiceRunArgs());
expect(writeGatewayRestartIntentSync).toHaveBeenCalledWith({ targetPid: 1234 });
expect(writeGatewayRestartIntentSync).toHaveBeenCalledWith({
targetPid: 1234,
reason: "gateway.restart",
});
expect(clearGatewayRestartIntentSync).not.toHaveBeenCalled();
expect(service.restart).toHaveBeenCalledTimes(1);
});
@@ -366,6 +369,7 @@ describe("runServiceRestart token drift", () => {
expect(writeGatewayRestartIntentSync).toHaveBeenCalledWith({
targetPid: 1234,
reason: "gateway.restart",
intent: {
waitMs: 2_500,
},
@@ -379,7 +383,10 @@ describe("runServiceRestart token drift", () => {
await expect(runServiceRestart(createServiceRunArgs())).rejects.toThrow("__exit__:1");
expect(writeGatewayRestartIntentSync).toHaveBeenCalledWith({ targetPid: 1234 });
expect(writeGatewayRestartIntentSync).toHaveBeenCalledWith({
targetPid: 1234,
reason: "gateway.restart",
});
expect(clearGatewayRestartIntentSync).toHaveBeenCalledOnce();
});

View File

@@ -574,6 +574,7 @@ export async function runServiceRestart(params: {
const runtime = await params.service.readRuntime(process.env).catch(() => null);
wroteRestartIntent = writeGatewayRestartIntentSync({
targetPid: runtime?.pid,
reason: "gateway.restart",
...(restartIntent ? { intent: restartIntent } : {}),
});
}

View File

@@ -208,6 +208,7 @@ async function restartGatewayWithoutServiceManager(
}
writeGatewayRestartIntentSync({
targetPid: pids[0],
reason: "gateway.restart",
...(restartIntent ? { intent: restartIntent } : {}),
});
signalVerifiedGatewayPidSync(pids[0], "SIGUSR1");

View File

@@ -6,7 +6,7 @@ const acquireGatewayLock = vi.fn(async (_opts?: { port?: number }) => ({
release: vi.fn(async () => {}),
}));
const consumeGatewayRestartIntentPayloadSync = vi.fn<
() => { force?: boolean; waitMs?: number } | null
() => { reason?: string; force?: boolean; waitMs?: number } | null
>(() => null);
const consumeGatewaySigusr1RestartAuthorization = vi.fn(() => true);
const consumeGatewayRestartIntentSync = vi.fn(() => false);
@@ -1086,6 +1086,40 @@ describe("runGatewayLoop", () => {
}
});
it("carries SIGTERM restart intent reason into launchd supervised handoff", async () => {
vi.clearAllMocks();
consumeGatewayRestartIntentPayloadSync.mockReturnValueOnce({ reason: "gateway.restart" });
try {
setPlatform("darwin");
process.env.LAUNCH_JOB_LABEL = "ai.openclaw.gateway";
restartGatewayProcessWithFreshPid.mockReturnValueOnce({
mode: "supervised",
});
await withIsolatedSignals(async ({ captureSignal }) => {
const { exited } = await createSignaledLoopHarness();
const sigterm = captureSignal("SIGTERM");
vi.useFakeTimers();
sigterm();
await vi.advanceTimersByTimeAsync(1500);
await expect(exited).resolves.toBe(0);
expectRestartHandoffCall({
restartKind: "full-process",
reason: "gateway.restart",
supervisorMode: "launchd",
});
});
} finally {
vi.useRealTimers();
delete process.env.LAUNCH_JOB_LABEL;
if (originalPlatformDescriptor) {
Object.defineProperty(process, "platform", originalPlatformDescriptor);
}
}
});
it("forwards lockPort to initial and restart lock acquisitions", async () => {
vi.clearAllMocks();
peekGatewaySigusr1RestartReason.mockReturnValue(undefined);

View File

@@ -25,6 +25,7 @@ const UPDATE_RESPAWN_HEALTH_POLL_MS = 200;
type GatewayRunSignalAction = "stop" | "restart";
type RestartDrainTimeoutMs = number | undefined;
type RestartIntentOptions = {
reason?: string;
force?: boolean;
waitMs?: number;
};
@@ -638,7 +639,12 @@ export async function runGatewayLoop(params: {
void (async () => {
const { consumeGatewayRestartIntentPayloadSync } = await loadGatewayLifecycleRuntimeModule();
const restartIntent = consumeGatewayRestartIntentPayloadSync();
request(restartIntent ? "restart" : "stop", "SIGTERM", undefined, restartIntent ?? undefined);
request(
restartIntent ? "restart" : "stop",
"SIGTERM",
restartIntent?.reason,
restartIntent ?? undefined,
);
})();
};
const onSigint = () => {
@@ -658,7 +664,7 @@ export async function runGatewayLoop(params: {
} = await loadGatewayLifecycleRuntimeModule();
const restartIntent = consumeGatewayRestartIntentPayloadSync();
if (restartIntent) {
request("restart", "SIGUSR1", "gateway.restart", restartIntent);
request("restart", "SIGUSR1", restartIntent.reason ?? "gateway.restart", restartIntent);
return;
}
const authorized = consumeGatewaySigusr1RestartAuthorization();

View File

@@ -64,18 +64,20 @@ describe("gateway restart intent", () => {
expect(fs.statSync(intentPath(env)).mode & 0o777).toBe(0o600);
});
it("round-trips restart force and wait options", () => {
it("round-trips restart reason, force, and wait options", () => {
const env = createIntentEnv();
expect(
writeGatewayRestartIntentSync({
env,
targetPid: process.pid,
reason: "gateway.restart",
intent: { force: true, waitMs: 12_345 },
}),
).toBe(true);
expect(consumeGatewayRestartIntentPayloadSync(env)).toEqual({
reason: "gateway.restart",
force: true,
waitMs: 12_345,
});

View File

@@ -91,11 +91,13 @@ type GatewayRestartIntentPayload = {
kind: "gateway-restart";
pid: number;
createdAt: number;
reason?: string;
force?: boolean;
waitMs?: number;
};
export type GatewayRestartIntent = {
reason?: string;
force?: boolean;
waitMs?: number;
};
@@ -125,6 +127,7 @@ export function writeGatewayRestartIntentSync(opts: {
env?: NodeJS.ProcessEnv;
targetPid?: number;
intent?: GatewayRestartIntent;
reason?: string;
}): boolean {
const targetPid = normalizeRestartIntentPid(opts.targetPid);
if (targetPid === null) {
@@ -133,10 +136,12 @@ export function writeGatewayRestartIntentSync(opts: {
const env = opts.env ?? process.env;
try {
const intentPath = resolveGatewayRestartIntentPath(env);
const reason = normalizeRestartIntentReason(opts.reason ?? opts.intent?.reason);
const payload: GatewayRestartIntentPayload = {
kind: "gateway-restart",
pid: targetPid,
createdAt: Date.now(),
...(reason ? { reason } : {}),
...(opts.intent?.force ? { force: true } : {}),
...(typeof opts.intent?.waitMs === "number" &&
Number.isFinite(opts.intent.waitMs) &&
@@ -170,14 +175,17 @@ function parseGatewayRestartIntent(raw: string): GatewayRestartIntentPayload | n
Number.isFinite(parsed.pid) &&
typeof parsed.createdAt === "number" &&
Number.isFinite(parsed.createdAt) &&
(parsed.reason === undefined || typeof parsed.reason === "string") &&
(parsed.force === undefined || typeof parsed.force === "boolean") &&
(parsed.waitMs === undefined ||
(typeof parsed.waitMs === "number" && Number.isFinite(parsed.waitMs) && parsed.waitMs >= 0))
) {
const reason = normalizeRestartIntentReason(parsed.reason);
return {
kind: "gateway-restart",
pid: parsed.pid,
createdAt: parsed.createdAt,
...(reason ? { reason } : {}),
...(parsed.force ? { force: true } : {}),
...(typeof parsed.waitMs === "number" ? { waitMs: Math.floor(parsed.waitMs) } : {}),
};
@@ -188,6 +196,11 @@ function parseGatewayRestartIntent(raw: string): GatewayRestartIntentPayload | n
return null;
}
function normalizeRestartIntentReason(reason: string | undefined): string | undefined {
const normalized = reason?.trim();
return normalized ? normalized.slice(0, 200) : undefined;
}
export function consumeGatewayRestartIntentPayloadSync(
env: NodeJS.ProcessEnv = process.env,
now = Date.now(),
@@ -217,6 +230,7 @@ export function consumeGatewayRestartIntentPayloadSync(
return null;
}
return {
...(payload.reason ? { reason: payload.reason } : {}),
...(payload.force ? { force: true } : {}),
...(typeof payload.waitMs === "number" ? { waitMs: payload.waitMs } : {}),
};