mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-14 19:40:40 +00:00
fix(gateway): drain active turns before restart to prevent message loss (#13931)
* fix(gateway): drain active turns before restart to prevent message loss On SIGUSR1 restart, the gateway now waits up to 30s for in-flight agent turns to complete before tearing down the server. This prevents buffered messages from being dropped when config.patch or update triggers a restart while agents are mid-turn. Changes: - command-queue.ts: add getActiveTaskCount() and waitForActiveTasks() helpers to track and wait on active lane tasks - run-loop.ts: on restart signal, drain active tasks before server.close() with a 30s timeout; extend force-exit timer accordingly - command-queue.test.ts: update imports for new exports Fixes #13883 * fix(queue): snapshot active tasks for restart drain --------- Co-authored-by: Elonito <0xRaini@users.noreply.github.com> Co-authored-by: Tak Hoffman <781889+Takhoffman@users.noreply.github.com>
This commit is contained in:
@@ -16,7 +16,14 @@ vi.mock("../logging/diagnostic.js", () => ({
|
||||
diagnosticLogger: diagnosticMocks.diag,
|
||||
}));
|
||||
|
||||
import { enqueueCommand, getQueueSize } from "./command-queue.js";
|
||||
import {
|
||||
enqueueCommand,
|
||||
enqueueCommandInLane,
|
||||
getActiveTaskCount,
|
||||
getQueueSize,
|
||||
setCommandLaneConcurrency,
|
||||
waitForActiveTasks,
|
||||
} from "./command-queue.js";
|
||||
|
||||
describe("command queue", () => {
|
||||
beforeEach(() => {
|
||||
@@ -85,4 +92,106 @@ describe("command queue", () => {
|
||||
expect(waited as number).toBeGreaterThanOrEqual(5);
|
||||
expect(queuedAhead).toBe(0);
|
||||
});
|
||||
|
||||
it("getActiveTaskCount returns count of currently executing tasks", async () => {
|
||||
let resolve1!: () => void;
|
||||
const blocker = new Promise<void>((r) => {
|
||||
resolve1 = r;
|
||||
});
|
||||
|
||||
const task = enqueueCommand(async () => {
|
||||
await blocker;
|
||||
});
|
||||
|
||||
// Give the event loop a tick for the task to start.
|
||||
await new Promise((r) => setTimeout(r, 5));
|
||||
expect(getActiveTaskCount()).toBe(1);
|
||||
|
||||
resolve1();
|
||||
await task;
|
||||
expect(getActiveTaskCount()).toBe(0);
|
||||
});
|
||||
|
||||
it("waitForActiveTasks resolves immediately when no tasks are active", async () => {
|
||||
const { drained } = await waitForActiveTasks(1000);
|
||||
expect(drained).toBe(true);
|
||||
});
|
||||
|
||||
it("waitForActiveTasks waits for active tasks to finish", async () => {
|
||||
let resolve1!: () => void;
|
||||
const blocker = new Promise<void>((r) => {
|
||||
resolve1 = r;
|
||||
});
|
||||
|
||||
const task = enqueueCommand(async () => {
|
||||
await blocker;
|
||||
});
|
||||
|
||||
// Give the task a tick to start.
|
||||
await new Promise((r) => setTimeout(r, 5));
|
||||
|
||||
const drainPromise = waitForActiveTasks(5000);
|
||||
|
||||
// Resolve the blocker after a short delay.
|
||||
setTimeout(() => resolve1(), 50);
|
||||
|
||||
const { drained } = await drainPromise;
|
||||
expect(drained).toBe(true);
|
||||
|
||||
await task;
|
||||
});
|
||||
|
||||
it("waitForActiveTasks returns drained=false on timeout", async () => {
|
||||
let resolve1!: () => void;
|
||||
const blocker = new Promise<void>((r) => {
|
||||
resolve1 = r;
|
||||
});
|
||||
|
||||
const task = enqueueCommand(async () => {
|
||||
await blocker;
|
||||
});
|
||||
|
||||
await new Promise((r) => setTimeout(r, 5));
|
||||
|
||||
const { drained } = await waitForActiveTasks(50);
|
||||
expect(drained).toBe(false);
|
||||
|
||||
resolve1();
|
||||
await task;
|
||||
});
|
||||
|
||||
it("waitForActiveTasks ignores tasks that start after the call", async () => {
|
||||
const lane = `drain-snapshot-${Date.now()}-${Math.random().toString(16).slice(2)}`;
|
||||
setCommandLaneConcurrency(lane, 2);
|
||||
|
||||
let resolve1!: () => void;
|
||||
const blocker1 = new Promise<void>((r) => {
|
||||
resolve1 = r;
|
||||
});
|
||||
let resolve2!: () => void;
|
||||
const blocker2 = new Promise<void>((r) => {
|
||||
resolve2 = r;
|
||||
});
|
||||
|
||||
const first = enqueueCommandInLane(lane, async () => {
|
||||
await blocker1;
|
||||
});
|
||||
await new Promise((r) => setTimeout(r, 5));
|
||||
|
||||
const drainPromise = waitForActiveTasks(2000);
|
||||
|
||||
// Starts after waitForActiveTasks snapshot and should not block drain completion.
|
||||
const second = enqueueCommandInLane(lane, async () => {
|
||||
await blocker2;
|
||||
});
|
||||
await new Promise((r) => setTimeout(r, 5));
|
||||
expect(getActiveTaskCount()).toBeGreaterThanOrEqual(2);
|
||||
|
||||
resolve1();
|
||||
const { drained } = await drainPromise;
|
||||
expect(drained).toBe(true);
|
||||
|
||||
resolve2();
|
||||
await Promise.all([first, second]);
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user