mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-12 07:20:45 +00:00
fix: kill stuck ACP child processes on startup and harden sessions in discord threads (#33699)
* Gateway: resolve agent.wait for chat.send runs * Discord: harden ACP thread binding + listener timeout * ACPX: handle already-exited child wait * Gateway/Discord: address PR review findings * Discord: keep ACP error-state thread bindings on startup * gateway: make agent.wait dedupe bridge event-driven * discord: harden ACP probe classification and cap startup fan-out * discord: add cooperative timeout cancellation * discord: fix startup probe concurrency helper typing * plugin-sdk: avoid Windows root-alias shard timeout * plugin-sdk: keep root alias reflection path non-blocking * discord+gateway: resolve remaining PR review findings * gateway+discord: fix codex review regressions * Discord/Gateway: address Codex review findings * Gateway: keep agent.wait lifecycle active with shared run IDs * Discord: clean up status reactions on aborted runs * fix: add changelog note for ACP/Discord startup hardening (#33699) (thanks @dutifulbob) --------- Co-authored-by: Onur <2453968+osolmaz@users.noreply.github.com>
This commit is contained in:
@@ -1,9 +1,15 @@
|
||||
import { spawn } from "node:child_process";
|
||||
import { mkdir, mkdtemp, rm, writeFile } from "node:fs/promises";
|
||||
import { tmpdir } from "node:os";
|
||||
import path from "node:path";
|
||||
import { afterEach, describe, expect, it } from "vitest";
|
||||
import { createWindowsCmdShimFixture } from "../../../shared/windows-cmd-shim-test-fixtures.js";
|
||||
import { resolveSpawnCommand, type SpawnCommandCache } from "./process.js";
|
||||
import {
|
||||
resolveSpawnCommand,
|
||||
spawnAndCollect,
|
||||
type SpawnCommandCache,
|
||||
waitForExit,
|
||||
} from "./process.js";
|
||||
|
||||
const tempDirs: string[] = [];
|
||||
|
||||
@@ -225,3 +231,62 @@ describe("resolveSpawnCommand", () => {
|
||||
expect(second.args[0]).toBe(scriptPath);
|
||||
});
|
||||
});
|
||||
|
||||
describe("waitForExit", () => {
|
||||
it("resolves when the child already exited before waiting starts", async () => {
|
||||
const child = spawn(process.execPath, ["-e", "process.exit(0)"], {
|
||||
stdio: ["pipe", "pipe", "pipe"],
|
||||
});
|
||||
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
child.once("close", () => {
|
||||
resolve();
|
||||
});
|
||||
child.once("error", reject);
|
||||
});
|
||||
|
||||
const exit = await waitForExit(child);
|
||||
expect(exit.code).toBe(0);
|
||||
expect(exit.signal).toBeNull();
|
||||
expect(exit.error).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe("spawnAndCollect", () => {
|
||||
it("returns abort error immediately when signal is already aborted", async () => {
|
||||
const controller = new AbortController();
|
||||
controller.abort();
|
||||
const result = await spawnAndCollect(
|
||||
{
|
||||
command: process.execPath,
|
||||
args: ["-e", "process.exit(0)"],
|
||||
cwd: process.cwd(),
|
||||
},
|
||||
undefined,
|
||||
{ signal: controller.signal },
|
||||
);
|
||||
|
||||
expect(result.code).toBeNull();
|
||||
expect(result.error?.name).toBe("AbortError");
|
||||
});
|
||||
|
||||
it("terminates a running process when signal aborts", async () => {
|
||||
const controller = new AbortController();
|
||||
const resultPromise = spawnAndCollect(
|
||||
{
|
||||
command: process.execPath,
|
||||
args: ["-e", "setTimeout(() => process.stdout.write('done'), 10_000)"],
|
||||
cwd: process.cwd(),
|
||||
},
|
||||
undefined,
|
||||
{ signal: controller.signal },
|
||||
);
|
||||
|
||||
setTimeout(() => {
|
||||
controller.abort();
|
||||
}, 10);
|
||||
|
||||
const result = await resultPromise;
|
||||
expect(result.error?.name).toBe("AbortError");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -114,6 +114,12 @@ export function resolveSpawnCommand(
|
||||
};
|
||||
}
|
||||
|
||||
function createAbortError(): Error {
|
||||
const error = new Error("Operation aborted.");
|
||||
error.name = "AbortError";
|
||||
return error;
|
||||
}
|
||||
|
||||
export function spawnWithResolvedCommand(
|
||||
params: {
|
||||
command: string;
|
||||
@@ -140,6 +146,15 @@ export function spawnWithResolvedCommand(
|
||||
}
|
||||
|
||||
export async function waitForExit(child: ChildProcessWithoutNullStreams): Promise<SpawnExit> {
|
||||
// Handle callers that start waiting after the child has already exited.
|
||||
if (child.exitCode !== null || child.signalCode !== null) {
|
||||
return {
|
||||
code: child.exitCode,
|
||||
signal: child.signalCode,
|
||||
error: null,
|
||||
};
|
||||
}
|
||||
|
||||
return await new Promise<SpawnExit>((resolve) => {
|
||||
let settled = false;
|
||||
const finish = (result: SpawnExit) => {
|
||||
@@ -167,12 +182,23 @@ export async function spawnAndCollect(
|
||||
cwd: string;
|
||||
},
|
||||
options?: SpawnCommandOptions,
|
||||
runtime?: {
|
||||
signal?: AbortSignal;
|
||||
},
|
||||
): Promise<{
|
||||
stdout: string;
|
||||
stderr: string;
|
||||
code: number | null;
|
||||
error: Error | null;
|
||||
}> {
|
||||
if (runtime?.signal?.aborted) {
|
||||
return {
|
||||
stdout: "",
|
||||
stderr: "",
|
||||
code: null,
|
||||
error: createAbortError(),
|
||||
};
|
||||
}
|
||||
const child = spawnWithResolvedCommand(params, options);
|
||||
child.stdin.end();
|
||||
|
||||
@@ -185,13 +211,43 @@ export async function spawnAndCollect(
|
||||
stderr += String(chunk);
|
||||
});
|
||||
|
||||
const exit = await waitForExit(child);
|
||||
return {
|
||||
stdout,
|
||||
stderr,
|
||||
code: exit.code,
|
||||
error: exit.error,
|
||||
let abortKillTimer: NodeJS.Timeout | undefined;
|
||||
let aborted = false;
|
||||
const onAbort = () => {
|
||||
aborted = true;
|
||||
try {
|
||||
child.kill("SIGTERM");
|
||||
} catch {
|
||||
// Ignore kill races when child already exited.
|
||||
}
|
||||
abortKillTimer = setTimeout(() => {
|
||||
if (child.exitCode !== null || child.signalCode !== null) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
child.kill("SIGKILL");
|
||||
} catch {
|
||||
// Ignore kill races when child already exited.
|
||||
}
|
||||
}, 250);
|
||||
abortKillTimer.unref?.();
|
||||
};
|
||||
runtime?.signal?.addEventListener("abort", onAbort, { once: true });
|
||||
|
||||
try {
|
||||
const exit = await waitForExit(child);
|
||||
return {
|
||||
stdout,
|
||||
stderr,
|
||||
code: exit.code,
|
||||
error: aborted ? createAbortError() : exit.error,
|
||||
};
|
||||
} finally {
|
||||
runtime?.signal?.removeEventListener("abort", onAbort);
|
||||
if (abortKillTimer) {
|
||||
clearTimeout(abortKillTimer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export function resolveSpawnFailure(
|
||||
|
||||
@@ -353,7 +353,10 @@ export class AcpxRuntime implements AcpRuntime {
|
||||
return ACPX_CAPABILITIES;
|
||||
}
|
||||
|
||||
async getStatus(input: { handle: AcpRuntimeHandle }): Promise<AcpRuntimeStatus> {
|
||||
async getStatus(input: {
|
||||
handle: AcpRuntimeHandle;
|
||||
signal?: AbortSignal;
|
||||
}): Promise<AcpRuntimeStatus> {
|
||||
const state = this.resolveHandleState(input.handle);
|
||||
const events = await this.runControlCommand({
|
||||
args: this.buildControlArgs({
|
||||
@@ -363,6 +366,7 @@ export class AcpxRuntime implements AcpRuntime {
|
||||
cwd: state.cwd,
|
||||
fallbackCode: "ACP_TURN_FAILED",
|
||||
ignoreNoSession: true,
|
||||
signal: input.signal,
|
||||
});
|
||||
const detail = events.find((event) => !toAcpxErrorEvent(event)) ?? events[0];
|
||||
if (!detail) {
|
||||
@@ -586,6 +590,7 @@ export class AcpxRuntime implements AcpRuntime {
|
||||
cwd: string;
|
||||
fallbackCode: AcpRuntimeErrorCode;
|
||||
ignoreNoSession?: boolean;
|
||||
signal?: AbortSignal;
|
||||
}): Promise<AcpxJsonObject[]> {
|
||||
const result = await spawnAndCollect(
|
||||
{
|
||||
@@ -594,6 +599,9 @@ export class AcpxRuntime implements AcpRuntime {
|
||||
cwd: params.cwd,
|
||||
},
|
||||
this.spawnCommandOptions,
|
||||
{
|
||||
signal: params.signal,
|
||||
},
|
||||
);
|
||||
|
||||
if (result.error) {
|
||||
|
||||
Reference in New Issue
Block a user