mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-03 04:36:21 +00:00
fix(agents): detect signaled local service exits
This commit is contained in:
@@ -8,6 +8,7 @@ import {
|
||||
attachModelProviderLocalService,
|
||||
ensureModelProviderLocalService,
|
||||
getModelProviderLocalService,
|
||||
hasLocalServiceProcessExited,
|
||||
stopManagedProviderLocalServicesForTest,
|
||||
} from "./provider-local-service.js";
|
||||
|
||||
@@ -65,6 +66,12 @@ describe("provider local service", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("treats signaled local service children as exited", () => {
|
||||
expect(hasLocalServiceProcessExited({ exitCode: null, signalCode: "SIGTERM" })).toBe(true);
|
||||
expect(hasLocalServiceProcessExited({ exitCode: 0, signalCode: null })).toBe(true);
|
||||
expect(hasLocalServiceProcessExited({ exitCode: null, signalCode: null })).toBe(false);
|
||||
});
|
||||
|
||||
it("starts an on-demand local service and stops it after idle", async () => {
|
||||
const port = await freePort();
|
||||
const healthUrl = `http://127.0.0.1:${port}/v1/models`;
|
||||
@@ -323,6 +330,29 @@ describe("provider local service", () => {
|
||||
expect(Date.now() - startedAt).toBeLessThan(5_000);
|
||||
});
|
||||
|
||||
it("reports a local service startup signal exit without waiting for readiness timeout", async () => {
|
||||
const port = await freePort();
|
||||
const model = attachModelProviderLocalService(
|
||||
{
|
||||
id: "demo",
|
||||
provider: "local-signal-exit",
|
||||
api: "openai-completions",
|
||||
baseUrl: `http://127.0.0.1:${port}/v1`,
|
||||
} as unknown as Model<"openai-completions">,
|
||||
{
|
||||
command: process.execPath,
|
||||
args: ["-e", "process.kill(process.pid, 'SIGTERM')"],
|
||||
readyTimeoutMs: 60_000,
|
||||
},
|
||||
);
|
||||
|
||||
const startedAt = Date.now();
|
||||
await expect(ensureModelProviderLocalService(model)).rejects.toThrow(
|
||||
"local-signal-exit local service exited before readiness with signal SIGTERM",
|
||||
);
|
||||
expect(Date.now() - startedAt).toBeLessThan(5_000);
|
||||
});
|
||||
|
||||
it("honors request aborts while waiting for local service readiness", async () => {
|
||||
const port = await freePort();
|
||||
const healthUrl = `http://127.0.0.1:${port}/v1/models`;
|
||||
|
||||
@@ -87,7 +87,8 @@ export async function ensureModelProviderLocalService(
|
||||
|
||||
try {
|
||||
if (
|
||||
managed.process?.exitCode === null &&
|
||||
managed.process &&
|
||||
!hasLocalServiceProcessExited(managed.process) &&
|
||||
(await probeHealth(healthUrl, healthHeaders, signal))
|
||||
) {
|
||||
return { release };
|
||||
@@ -110,7 +111,7 @@ export async function ensureModelProviderLocalService(
|
||||
});
|
||||
}
|
||||
await waitForAbort(managed.starting, signal);
|
||||
if (!managed.process || managed.process.exitCode !== null) {
|
||||
if (!managed.process || hasLocalServiceProcessExited(managed.process)) {
|
||||
release();
|
||||
return undefined;
|
||||
}
|
||||
@@ -232,7 +233,7 @@ async function startAndWaitForLocalService(params: {
|
||||
if (await probeHealth(healthUrl, healthHeaders, signal)) {
|
||||
return;
|
||||
}
|
||||
if (managed.process?.exitCode === null) {
|
||||
if (managed.process && !hasLocalServiceProcessExited(managed.process)) {
|
||||
log.info(`restarting unhealthy ${provider} local service`);
|
||||
await stopManagedProcessForRestart(managed, signal);
|
||||
}
|
||||
@@ -321,7 +322,7 @@ function stopManagedService(key: string, managed: ManagedLocalService, reason: s
|
||||
managed.process = undefined;
|
||||
managed.lastExit = undefined;
|
||||
services.delete(key);
|
||||
if (child && child.exitCode === null) {
|
||||
if (child && !hasLocalServiceProcessExited(child)) {
|
||||
log.info(`stopping local model service: reason=${reason}`);
|
||||
child.kill("SIGTERM");
|
||||
}
|
||||
@@ -334,12 +335,12 @@ async function stopManagedProcessForRestart(
|
||||
const child = managed.process;
|
||||
managed.process = undefined;
|
||||
managed.lastExit = undefined;
|
||||
if (!child || child.exitCode !== null) {
|
||||
if (!child || hasLocalServiceProcessExited(child)) {
|
||||
return;
|
||||
}
|
||||
child.kill("SIGTERM");
|
||||
await waitForChildExit(child, signal, DEFAULT_PROBE_TIMEOUT_MS);
|
||||
if (child.exitCode === null) {
|
||||
if (!hasLocalServiceProcessExited(child)) {
|
||||
child.kill("SIGKILL");
|
||||
await waitForChildExit(child, signal, DEFAULT_PROBE_TIMEOUT_MS);
|
||||
}
|
||||
@@ -464,7 +465,7 @@ function waitForChildExit(
|
||||
signal: AbortSignal,
|
||||
timeoutMs: number,
|
||||
): Promise<void> {
|
||||
if (child.exitCode !== null) {
|
||||
if (hasLocalServiceProcessExited(child)) {
|
||||
return Promise.resolve();
|
||||
}
|
||||
throwIfAborted(signal);
|
||||
@@ -490,3 +491,9 @@ function waitForChildExit(
|
||||
signal.addEventListener("abort", onAbort, { once: true });
|
||||
});
|
||||
}
|
||||
|
||||
export function hasLocalServiceProcessExited(
|
||||
child: Pick<ChildProcess, "exitCode" | "signalCode">,
|
||||
): boolean {
|
||||
return child.exitCode !== null || child.signalCode !== null;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user