fix: defer gateway scheduled services (#65365) (thanks @lml2468)

This commit is contained in:
Peter Steinberger
2026-04-12 17:02:12 -07:00
parent 92776b8d77
commit 6a7961736a
3 changed files with 130 additions and 20 deletions

View File

@@ -8,6 +8,8 @@ Docs: https://docs.openclaw.ai
### Fixes
- Gateway/startup: defer heartbeat, cron, and pending delivery recovery until sidecars finish so Sandbox wake and chat history startup gates cannot block channel resume. (#65365) Thanks @lml2468.
## 2026.4.12-beta.1
### Changes

View File

@@ -0,0 +1,126 @@
import { beforeEach, describe, expect, it, vi } from "vitest";
const hoisted = vi.hoisted(() => {
const heartbeatRunner = {
stop: vi.fn(),
updateConfig: vi.fn(),
};
return {
heartbeatRunner,
startHeartbeatRunner: vi.fn(() => heartbeatRunner),
startChannelHealthMonitor: vi.fn(() => ({ stop: vi.fn() })),
startGatewayModelPricingRefresh: vi.fn(() => vi.fn()),
recoverPendingDeliveries: vi.fn(async () => undefined),
deliverOutboundPayloads: vi.fn(),
};
});
vi.mock("../infra/heartbeat-runner.js", () => ({
startHeartbeatRunner: hoisted.startHeartbeatRunner,
}));
vi.mock("../infra/outbound/deliver.js", () => ({
deliverOutboundPayloads: hoisted.deliverOutboundPayloads,
}));
vi.mock("../infra/outbound/delivery-queue.js", () => ({
recoverPendingDeliveries: hoisted.recoverPendingDeliveries,
}));
vi.mock("./channel-health-monitor.js", () => ({
startChannelHealthMonitor: hoisted.startChannelHealthMonitor,
}));
vi.mock("./model-pricing-cache.js", () => ({
startGatewayModelPricingRefresh: hoisted.startGatewayModelPricingRefresh,
}));
const { activateGatewayScheduledServices, startGatewayRuntimeServices } =
await import("./server-runtime-services.js");
describe("server-runtime-services", () => {
beforeEach(() => {
hoisted.heartbeatRunner.stop.mockClear();
hoisted.heartbeatRunner.updateConfig.mockClear();
hoisted.startHeartbeatRunner.mockClear();
hoisted.startChannelHealthMonitor.mockClear();
hoisted.startGatewayModelPricingRefresh.mockClear();
hoisted.recoverPendingDeliveries.mockClear();
hoisted.deliverOutboundPayloads.mockClear();
});
it("keeps scheduled services inert during initial runtime setup", () => {
const services = startGatewayRuntimeServices({
minimalTestGateway: false,
cfgAtStart: {} as never,
channelManager: {
getRuntimeSnapshot: vi.fn(),
isHealthMonitorEnabled: vi.fn(),
isManuallyStopped: vi.fn(),
} as never,
log: createLog(),
});
expect(hoisted.startChannelHealthMonitor).toHaveBeenCalledTimes(1);
expect(hoisted.startHeartbeatRunner).not.toHaveBeenCalled();
expect(hoisted.recoverPendingDeliveries).not.toHaveBeenCalled();
services.heartbeatRunner.stop();
expect(hoisted.heartbeatRunner.stop).not.toHaveBeenCalled();
});
it("activates heartbeat, cron, and delivery recovery after sidecars are ready", async () => {
const cron = { start: vi.fn(async () => undefined) };
const log = createLog();
const services = activateGatewayScheduledServices({
minimalTestGateway: false,
cfgAtStart: {} as never,
cron,
logCron: { error: vi.fn() },
log,
});
expect(hoisted.startHeartbeatRunner).toHaveBeenCalledTimes(1);
expect(cron.start).toHaveBeenCalledTimes(1);
expect(services.heartbeatRunner).toBe(hoisted.heartbeatRunner);
await vi.waitFor(() => {
expect(hoisted.recoverPendingDeliveries).toHaveBeenCalledWith(
expect.objectContaining({
deliver: hoisted.deliverOutboundPayloads,
cfg: {},
}),
);
});
});
it("keeps scheduled services disabled for minimal test gateways", () => {
const cron = { start: vi.fn(async () => undefined) };
const services = activateGatewayScheduledServices({
minimalTestGateway: true,
cfgAtStart: {} as never,
cron,
logCron: { error: vi.fn() },
log: createLog(),
});
expect(hoisted.startHeartbeatRunner).not.toHaveBeenCalled();
expect(cron.start).not.toHaveBeenCalled();
expect(hoisted.recoverPendingDeliveries).not.toHaveBeenCalled();
services.heartbeatRunner.stop();
expect(hoisted.heartbeatRunner.stop).not.toHaveBeenCalled();
});
});
function createLog() {
return {
child: vi.fn(() => ({
info: vi.fn(),
warn: vi.fn(),
error: vi.fn(),
})),
error: vi.fn(),
};
}

View File

@@ -77,9 +77,7 @@ export function startGatewayRuntimeServices(params: {
channelHealthMonitor: ChannelHealthMonitor | null;
stopModelPricingRefresh: () => void;
} {
// Return a noop heartbeat runner for now. The real runner is created
// in activateGatewayScheduledServices() after sidecars finish and
// chat.history becomes available. See #65322.
// Keep scheduled work inert until post-attach sidecars finish.
const channelHealthMonitor = startGatewayChannelHealthMonitor({
cfg: params.cfgAtStart,
channelManager: params.channelManager,
@@ -95,25 +93,9 @@ export function startGatewayRuntimeServices(params: {
};
}
/**
* Activate cron scheduler and pending delivery recovery AFTER gateway
* sidecars are fully started and chat.history is available.
*
* Previously these ran inside startGatewayRuntimeServices(), which
* fires before sidecars finish — creating a race where cron/heartbeat
* jobs could call chat.history while it was still marked unavailable.
* See: https://github.com/openclaw/openclaw/issues/65322
*/
/**
* Activate cron scheduler, heartbeat runner, and pending delivery recovery
* AFTER gateway sidecars are fully started and chat.history is available.
*
* Previously these ran inside startGatewayRuntimeServices(), which fires
* before sidecars finish — creating a race where cron/heartbeat jobs
* could call chat.history while it was still marked unavailable.
* See: https://github.com/openclaw/openclaw/issues/65322
*
* Returns the real heartbeat runner so the caller can update runtimeState.
* after gateway sidecars are fully started and chat.history is available.
*/
export function activateGatewayScheduledServices(params: {
minimalTestGateway: boolean;