diff --git a/src/gateway/server-runtime-services.ts b/src/gateway/server-runtime-services.ts index 39376b6c696..5da78182f0f 100644 --- a/src/gateway/server-runtime-services.ts +++ b/src/gateway/server-runtime-services.ts @@ -71,35 +71,22 @@ export function startGatewayRuntimeServices(params: { minimalTestGateway: boolean; cfgAtStart: OpenClawConfig; channelManager: GatewayChannelManager; - cron: { start: () => Promise }; - logCron: { error: (message: string) => void }; log: GatewayRuntimeServiceLogger; }): { heartbeatRunner: HeartbeatRunner; channelHealthMonitor: ChannelHealthMonitor | null; stopModelPricingRefresh: () => void; } { - const heartbeatRunner = params.minimalTestGateway - ? createNoopHeartbeatRunner() - : startHeartbeatRunner({ cfg: params.cfgAtStart }); + // Return a noop heartbeat runner for now. The real runner is created + // in activateGatewayScheduledServices() after sidecars finish and + // chat.history becomes available. See #65322. const channelHealthMonitor = startGatewayChannelHealthMonitor({ cfg: params.cfgAtStart, channelManager: params.channelManager, }); - if (!params.minimalTestGateway) { - startGatewayCronWithLogging({ - cron: params.cron, - logCron: params.logCron, - }); - recoverPendingOutboundDeliveries({ - cfg: params.cfgAtStart, - log: params.log, - }); - } - return { - heartbeatRunner, + heartbeatRunner: createNoopHeartbeatRunner(), channelHealthMonitor, stopModelPricingRefresh: !params.minimalTestGateway && process.env.VITEST !== "1" @@ -107,3 +94,45 @@ export function startGatewayRuntimeServices(params: { : () => {}, }; } + +/** + * Activate cron scheduler and pending delivery recovery AFTER gateway + * sidecars are fully started and chat.history is available. + * + * Previously these ran inside startGatewayRuntimeServices(), which + * fires before sidecars finish — creating a race where cron/heartbeat + * jobs could call chat.history while it was still marked unavailable. + * See: https://github.com/openclaw/openclaw/issues/65322 + */ +/** + * Activate cron scheduler, heartbeat runner, and pending delivery recovery + * AFTER gateway sidecars are fully started and chat.history is available. + * + * Previously these ran inside startGatewayRuntimeServices(), which fires + * before sidecars finish — creating a race where cron/heartbeat jobs + * could call chat.history while it was still marked unavailable. + * See: https://github.com/openclaw/openclaw/issues/65322 + * + * Returns the real heartbeat runner so the caller can update runtimeState. + */ +export function activateGatewayScheduledServices(params: { + minimalTestGateway: boolean; + cfgAtStart: OpenClawConfig; + cron: { start: () => Promise }; + logCron: { error: (message: string) => void }; + log: GatewayRuntimeServiceLogger; +}): { heartbeatRunner: HeartbeatRunner } { + if (params.minimalTestGateway) { + return { heartbeatRunner: createNoopHeartbeatRunner() }; + } + const heartbeatRunner = startHeartbeatRunner({ cfg: params.cfgAtStart }); + startGatewayCronWithLogging({ + cron: params.cron, + logCron: params.logCron, + }); + recoverPendingOutboundDeliveries({ + cfg: params.cfgAtStart, + log: params.log, + }); + return { heartbeatRunner }; +} diff --git a/src/gateway/server.impl.ts b/src/gateway/server.impl.ts index 528e4577f10..29901b2f5ed 100644 --- a/src/gateway/server.impl.ts +++ b/src/gateway/server.impl.ts @@ -56,7 +56,10 @@ import { setFallbackGatewayContextResolver } from "./server-plugins.js"; import { startManagedGatewayConfigReloader } from "./server-reload-handlers.js"; import { createGatewayRequestContext } from "./server-request-context.js"; import { resolveGatewayRuntimeConfig } from "./server-runtime-config.js"; -import { startGatewayRuntimeServices } from "./server-runtime-services.js"; +import { + activateGatewayScheduledServices, + startGatewayRuntimeServices, +} from "./server-runtime-services.js"; import { createGatewayRuntimeState } from "./server-runtime-state.js"; import { startGatewayEventSubscriptions } from "./server-runtime-subscriptions.js"; import { resolveSessionKeyForRun } from "./server-session-key.js"; @@ -608,8 +611,6 @@ export async function startGatewayServer( minimalTestGateway, cfgAtStart, channelManager, - cron: runtimeState.cronState.cron, - logCron, log, }), ); @@ -755,6 +756,19 @@ export async function startGatewayServer( unavailableGatewayMethods, })); + // Activate cron scheduler, heartbeat runner, and pending delivery + // recovery now that sidecars are ready and chat.history is available. + // Previously these ran before sidecars finished, causing a race. + // See #65322. + const activated = activateGatewayScheduledServices({ + minimalTestGateway, + cfgAtStart, + cron: runtimeState.cronState.cron, + logCron, + log, + }); + runtimeState.heartbeatRunner = activated.heartbeatRunner; + runtimeState.configReloader = startManagedGatewayConfigReloader({ minimalTestGateway, initialConfig: cfgAtStart,