mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-02 02:54:56 +00:00
fix(agents): force SIGKILL for stuck MCP stdio children (#86739)
Guarantee MCP stdio child cleanup during Gateway shutdown by sending a synchronous SIGKILL when the child survives the existing stdin and SIGTERM waits. This prevents SIGTERM-ignoring local MCP processes from outliving the Gateway when killProcessTree's unref'd SIGKILL timer would otherwise lose the shutdown race. Fixes #86412. Verification: - GitHub CI green on relevant agent/runtime, lint/type, CodeQL/security, OpenGrep, and Real behavior proof checks. - Real behavior proof: https://github.com/openclaw/openclaw/actions/runs/26430512156/job/77802651894 - Maintainer manual review: no blocking findings. Thanks @openperf. Co-authored-by: openperf <16864032@qq.com>
This commit is contained in:
@@ -6,6 +6,7 @@ import { OpenClawStdioClientTransport } from "./mcp-stdio-transport.js";
|
||||
|
||||
const spawnMock = vi.hoisted(() => vi.fn());
|
||||
const killProcessTreeMock = vi.hoisted(() => vi.fn());
|
||||
const signalProcessTreeMock = vi.hoisted(() => vi.fn());
|
||||
|
||||
vi.mock("node:child_process", async () => ({
|
||||
...(await vi.importActual<typeof import("node:child_process")>("node:child_process")),
|
||||
@@ -14,6 +15,7 @@ vi.mock("node:child_process", async () => ({
|
||||
|
||||
vi.mock("../process/kill-tree.js", () => ({
|
||||
killProcessTree: killProcessTreeMock,
|
||||
signalProcessTree: signalProcessTreeMock,
|
||||
}));
|
||||
|
||||
class MockChildProcess extends EventEmitter {
|
||||
@@ -29,6 +31,7 @@ describe("OpenClawStdioClientTransport", () => {
|
||||
vi.useRealTimers();
|
||||
spawnMock.mockReset();
|
||||
killProcessTreeMock.mockReset();
|
||||
signalProcessTreeMock.mockReset();
|
||||
});
|
||||
|
||||
it("starts stdio MCP servers in a disposable process group on POSIX", async () => {
|
||||
@@ -88,6 +91,30 @@ describe("OpenClawStdioClientTransport", () => {
|
||||
await closing;
|
||||
});
|
||||
|
||||
it("force-SIGKILLs synchronously when killProcessTree's grace expires (#86412)", async () => {
|
||||
vi.useFakeTimers();
|
||||
const child = new MockChildProcess();
|
||||
spawnMock.mockReturnValue(child);
|
||||
|
||||
const transport = new OpenClawStdioClientTransport({ command: "npx" });
|
||||
const started = transport.start();
|
||||
child.emit("spawn");
|
||||
await started;
|
||||
|
||||
const closing = transport.close();
|
||||
await vi.advanceTimersByTimeAsync(2000);
|
||||
expect(killProcessTreeMock).toHaveBeenCalledWith(4321);
|
||||
expect(signalProcessTreeMock).not.toHaveBeenCalled();
|
||||
|
||||
// killProcessTree's SIGKILL is .unref()'d (#86412); close() force-SIGKILLs synchronously instead.
|
||||
await vi.advanceTimersByTimeAsync(2000);
|
||||
expect(signalProcessTreeMock).toHaveBeenCalledWith(4321, "SIGKILL");
|
||||
|
||||
child.exitCode = 0;
|
||||
child.emit("close", 0);
|
||||
await closing;
|
||||
});
|
||||
|
||||
it("does not kill the process tree when graceful stdio close exits", async () => {
|
||||
vi.useFakeTimers();
|
||||
const child = new MockChildProcess();
|
||||
|
||||
@@ -5,7 +5,7 @@ import { getDefaultEnvironment } from "@modelcontextprotocol/sdk/client/stdio.js
|
||||
import { ReadBuffer, serializeMessage } from "@modelcontextprotocol/sdk/shared/stdio.js";
|
||||
import type { Transport } from "@modelcontextprotocol/sdk/shared/transport.js";
|
||||
import type { JSONRPCMessage } from "@modelcontextprotocol/sdk/types.js";
|
||||
import { killProcessTree } from "../process/kill-tree.js";
|
||||
import { killProcessTree, signalProcessTree } from "../process/kill-tree.js";
|
||||
import { prepareOomScoreAdjustedSpawn } from "../process/linux-oom-score.js";
|
||||
|
||||
export type OpenClawStdioServerParameters = {
|
||||
@@ -17,6 +17,7 @@ export type OpenClawStdioServerParameters = {
|
||||
};
|
||||
|
||||
const CLOSE_TIMEOUT_MS = 2000;
|
||||
const SIGKILL_REAP_TIMEOUT_MS = 500;
|
||||
|
||||
function delay(ms: number) {
|
||||
return new Promise<void>((resolve) => {
|
||||
@@ -125,6 +126,11 @@ export class OpenClawStdioClientTransport implements Transport {
|
||||
if (processToClose.exitCode === null && processToClose.pid) {
|
||||
killProcessTree(processToClose.pid);
|
||||
await Promise.race([closePromise, delay(CLOSE_TIMEOUT_MS)]);
|
||||
if (processToClose.exitCode === null && processToClose.pid) {
|
||||
// SIGKILL synchronously: killProcessTree's setTimeout is .unref()'d and races shutdown (#86412).
|
||||
signalProcessTree(processToClose.pid, "SIGKILL");
|
||||
await Promise.race([closePromise, delay(SIGKILL_REAP_TIMEOUT_MS)]);
|
||||
}
|
||||
}
|
||||
}
|
||||
this.readBuffer.clear();
|
||||
|
||||
Reference in New Issue
Block a user