fix: kill stuck ACP child processes on startup and harden sessions in discord threads (#33699)

* Gateway: resolve agent.wait for chat.send runs

* Discord: harden ACP thread binding + listener timeout

* ACPX: handle already-exited child wait

* Gateway/Discord: address PR review findings

* Discord: keep ACP error-state thread bindings on startup

* gateway: make agent.wait dedupe bridge event-driven

* discord: harden ACP probe classification and cap startup fan-out

* discord: add cooperative timeout cancellation

* discord: fix startup probe concurrency helper typing

* plugin-sdk: avoid Windows root-alias shard timeout

* plugin-sdk: keep root alias reflection path non-blocking

* discord+gateway: resolve remaining PR review findings

* gateway+discord: fix codex review regressions

* Discord/Gateway: address Codex review findings

* Gateway: keep agent.wait lifecycle active with shared run IDs

* Discord: clean up status reactions on aborted runs

* fix: add changelog note for ACP/Discord startup hardening (#33699) (thanks @dutifulbob)

---------

Co-authored-by: Onur <2453968+osolmaz@users.noreply.github.com>
This commit is contained in:
Bob
2026-03-04 10:52:28 +01:00
committed by GitHub
parent bd25182d5a
commit 61f7cea48b
30 changed files with 2568 additions and 180 deletions

View File

@@ -1,9 +1,15 @@
import { spawn } from "node:child_process";
import { mkdir, mkdtemp, rm, writeFile } from "node:fs/promises";
import { tmpdir } from "node:os";
import path from "node:path";
import { afterEach, describe, expect, it } from "vitest";
import { createWindowsCmdShimFixture } from "../../../shared/windows-cmd-shim-test-fixtures.js";
import { resolveSpawnCommand, type SpawnCommandCache } from "./process.js";
import {
resolveSpawnCommand,
spawnAndCollect,
type SpawnCommandCache,
waitForExit,
} from "./process.js";
const tempDirs: string[] = [];
@@ -225,3 +231,62 @@ describe("resolveSpawnCommand", () => {
expect(second.args[0]).toBe(scriptPath);
});
});
describe("waitForExit", () => {
it("resolves when the child already exited before waiting starts", async () => {
const child = spawn(process.execPath, ["-e", "process.exit(0)"], {
stdio: ["pipe", "pipe", "pipe"],
});
await new Promise<void>((resolve, reject) => {
child.once("close", () => {
resolve();
});
child.once("error", reject);
});
const exit = await waitForExit(child);
expect(exit.code).toBe(0);
expect(exit.signal).toBeNull();
expect(exit.error).toBeNull();
});
});
describe("spawnAndCollect", () => {
it("returns abort error immediately when signal is already aborted", async () => {
const controller = new AbortController();
controller.abort();
const result = await spawnAndCollect(
{
command: process.execPath,
args: ["-e", "process.exit(0)"],
cwd: process.cwd(),
},
undefined,
{ signal: controller.signal },
);
expect(result.code).toBeNull();
expect(result.error?.name).toBe("AbortError");
});
it("terminates a running process when signal aborts", async () => {
const controller = new AbortController();
const resultPromise = spawnAndCollect(
{
command: process.execPath,
args: ["-e", "setTimeout(() => process.stdout.write('done'), 10_000)"],
cwd: process.cwd(),
},
undefined,
{ signal: controller.signal },
);
setTimeout(() => {
controller.abort();
}, 10);
const result = await resultPromise;
expect(result.error?.name).toBe("AbortError");
});
});

View File

@@ -114,6 +114,12 @@ export function resolveSpawnCommand(
};
}
function createAbortError(): Error {
const error = new Error("Operation aborted.");
error.name = "AbortError";
return error;
}
export function spawnWithResolvedCommand(
params: {
command: string;
@@ -140,6 +146,15 @@ export function spawnWithResolvedCommand(
}
export async function waitForExit(child: ChildProcessWithoutNullStreams): Promise<SpawnExit> {
// Handle callers that start waiting after the child has already exited.
if (child.exitCode !== null || child.signalCode !== null) {
return {
code: child.exitCode,
signal: child.signalCode,
error: null,
};
}
return await new Promise<SpawnExit>((resolve) => {
let settled = false;
const finish = (result: SpawnExit) => {
@@ -167,12 +182,23 @@ export async function spawnAndCollect(
cwd: string;
},
options?: SpawnCommandOptions,
runtime?: {
signal?: AbortSignal;
},
): Promise<{
stdout: string;
stderr: string;
code: number | null;
error: Error | null;
}> {
if (runtime?.signal?.aborted) {
return {
stdout: "",
stderr: "",
code: null,
error: createAbortError(),
};
}
const child = spawnWithResolvedCommand(params, options);
child.stdin.end();
@@ -185,13 +211,43 @@ export async function spawnAndCollect(
stderr += String(chunk);
});
const exit = await waitForExit(child);
return {
stdout,
stderr,
code: exit.code,
error: exit.error,
let abortKillTimer: NodeJS.Timeout | undefined;
let aborted = false;
const onAbort = () => {
aborted = true;
try {
child.kill("SIGTERM");
} catch {
// Ignore kill races when child already exited.
}
abortKillTimer = setTimeout(() => {
if (child.exitCode !== null || child.signalCode !== null) {
return;
}
try {
child.kill("SIGKILL");
} catch {
// Ignore kill races when child already exited.
}
}, 250);
abortKillTimer.unref?.();
};
runtime?.signal?.addEventListener("abort", onAbort, { once: true });
try {
const exit = await waitForExit(child);
return {
stdout,
stderr,
code: exit.code,
error: aborted ? createAbortError() : exit.error,
};
} finally {
runtime?.signal?.removeEventListener("abort", onAbort);
if (abortKillTimer) {
clearTimeout(abortKillTimer);
}
}
}
export function resolveSpawnFailure(

View File

@@ -353,7 +353,10 @@ export class AcpxRuntime implements AcpRuntime {
return ACPX_CAPABILITIES;
}
async getStatus(input: { handle: AcpRuntimeHandle }): Promise<AcpRuntimeStatus> {
async getStatus(input: {
handle: AcpRuntimeHandle;
signal?: AbortSignal;
}): Promise<AcpRuntimeStatus> {
const state = this.resolveHandleState(input.handle);
const events = await this.runControlCommand({
args: this.buildControlArgs({
@@ -363,6 +366,7 @@ export class AcpxRuntime implements AcpRuntime {
cwd: state.cwd,
fallbackCode: "ACP_TURN_FAILED",
ignoreNoSession: true,
signal: input.signal,
});
const detail = events.find((event) => !toAcpxErrorEvent(event)) ?? events[0];
if (!detail) {
@@ -586,6 +590,7 @@ export class AcpxRuntime implements AcpRuntime {
cwd: string;
fallbackCode: AcpRuntimeErrorCode;
ignoreNoSession?: boolean;
signal?: AbortSignal;
}): Promise<AcpxJsonObject[]> {
const result = await spawnAndCollect(
{
@@ -594,6 +599,9 @@ export class AcpxRuntime implements AcpRuntime {
cwd: params.cwd,
},
this.spawnCommandOptions,
{
signal: params.signal,
},
);
if (result.error) {