diff --git a/CHANGELOG.md b/CHANGELOG.md index 0c15945205a..5c99c2e6c42 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -47,6 +47,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- Gateway/startup: start cron and record the post-ready memory trace even when deferred maintenance timers fail after readiness, so a non-fatal timer setup issue does not silently leave scheduled jobs idle. Thanks @vincentkoc. - Agents/session status: keep semantic `session_status({ sessionKey: "current" })` on the live run session even before that run has a persisted session-store entry, instead of falling back to the sandbox policy key. Thanks @vincentkoc. - QA/Slack: resolve bundled official plugin public-surface package aliases during source-mode QA runs, so release Slack live validation can load `@openclaw/slack/api.js` without workspace symlinks. Thanks @vincentkoc. - Codex: pass the live run session key into app-server dynamic tools when sandbox policy uses a separate session key, so `session_status({ sessionKey: "current" })` reports the active run instead of the sandbox policy key. Thanks @vincentkoc. diff --git a/src/gateway/server-runtime-services.test.ts b/src/gateway/server-runtime-services.test.ts index 1c1b81f010e..d9e5843429a 100644 --- a/src/gateway/server-runtime-services.test.ts +++ b/src/gateway/server-runtime-services.test.ts @@ -52,8 +52,11 @@ vi.mock("./model-pricing-cache.js", () => ({ startGatewayModelPricingRefresh: hoisted.startGatewayModelPricingRefresh, })); -const { activateGatewayScheduledServices, startGatewayRuntimeServices } = - await import("./server-runtime-services.js"); +const { + activateGatewayScheduledServices, + runGatewayPostReadyMaintenance, + startGatewayRuntimeServices, +} = await import("./server-runtime-services.js"); describe("server-runtime-services", () => { beforeEach(() => { @@ -217,6 +220,31 @@ describe("server-runtime-services", () => { expect(hoisted.recoverPendingDeliveries).toHaveBeenCalledTimes(1); }); + it("starts cron and records memory when post-ready maintenance fails", async () => { + const cron = { start: vi.fn(async () => undefined) }; + const log = createLog(); + const recordPostReadyMemory = vi.fn(); + + await runGatewayPostReadyMaintenance({ + startMaintenance: vi.fn(async () => { + throw new Error("timers unavailable"); + }), + applyMaintenance: vi.fn(), + shouldStartCron: () => true, + markCronStartHandled: vi.fn(), + cron, + logCron: { error: vi.fn() }, + log, + recordPostReadyMemory, + }); + + expect(log.warn).toHaveBeenCalledWith( + "gateway post-ready maintenance startup failed: Error: timers unavailable", + ); + expect(cron.start).toHaveBeenCalledTimes(1); + expect(recordPostReadyMemory).toHaveBeenCalledTimes(1); + }); + it("keeps scheduled services disabled for minimal test gateways", () => { const cron = { start: vi.fn(async () => undefined) }; @@ -247,6 +275,7 @@ function createLog() { warn: vi.fn(), error: vi.fn(), })), + warn: vi.fn(), error: vi.fn(), }; } diff --git a/src/gateway/server-runtime-services.ts b/src/gateway/server-runtime-services.ts index df927da8b9c..b01efad65d4 100644 --- a/src/gateway/server-runtime-services.ts +++ b/src/gateway/server-runtime-services.ts @@ -5,6 +5,7 @@ import type { PluginMetadataRegistryView } from "../plugins/plugin-metadata-snap import type { ChannelHealthMonitor } from "./channel-health-monitor.js"; import { startChannelHealthMonitor } from "./channel-health-monitor.js"; import { isGatewayModelPricingEnabled } from "./model-pricing-config.js"; +import type { startGatewayMaintenanceTimers } from "./server-maintenance.js"; type GatewayRuntimeServiceLogger = { child: (name: string) => { @@ -14,6 +15,12 @@ type GatewayRuntimeServiceLogger = { }; error: (message: string) => void; }; +type GatewayPostReadyLogger = { + warn: (message: string) => void; +}; +type GatewayMaintenanceHandles = NonNullable< + Awaited> +>; export type GatewayChannelManager = Parameters< typeof startChannelHealthMonitor @@ -53,6 +60,34 @@ export function startGatewayCronWithLogging(params: { void params.cron.start().catch((err) => params.logCron.error(`failed to start: ${String(err)}`)); } +export async function runGatewayPostReadyMaintenance(params: { + startMaintenance: () => Promise; + applyMaintenance: (maintenance: GatewayMaintenanceHandles) => void; + shouldStartCron: () => boolean; + markCronStartHandled: () => void; + cron: { start: () => Promise }; + logCron: { error: (message: string) => void }; + log: GatewayPostReadyLogger; + recordPostReadyMemory: () => void; +}): Promise { + try { + const maintenance = await params.startMaintenance(); + if (maintenance) { + params.applyMaintenance(maintenance); + } + } catch (err) { + params.log.warn(`gateway post-ready maintenance startup failed: ${String(err)}`); + } + if (params.shouldStartCron()) { + params.markCronStartHandled(); + startGatewayCronWithLogging({ + cron: params.cron, + logCron: params.logCron, + }); + } + params.recordPostReadyMemory(); +} + function recoverPendingOutboundDeliveries(params: { cfg: OpenClawConfig; log: GatewayRuntimeServiceLogger; diff --git a/src/gateway/server.impl.ts b/src/gateway/server.impl.ts index 3853400d1a5..b6f0e23dea1 100644 --- a/src/gateway/server.impl.ts +++ b/src/gateway/server.impl.ts @@ -1491,24 +1491,24 @@ export async function startGatewayServer( }); if (!minimalTestGateway) { const handle = setTimeout(() => { - void (async () => { - const maintenance = await earlyRuntime.startMaintenance(); - if (maintenance) { + void gatewayRuntimeServices.runGatewayPostReadyMaintenance({ + startMaintenance: earlyRuntime.startMaintenance, + applyMaintenance: (maintenance) => { runtimeState.tickInterval = maintenance.tickInterval; runtimeState.healthInterval = maintenance.healthInterval; runtimeState.dedupeCleanup = maintenance.dedupeCleanup; runtimeState.mediaCleanup = maintenance.mediaCleanup; - } - if (!gatewayCronStartHandled) { + }, + shouldStartCron: () => !gatewayCronStartHandled, + markCronStartHandled: () => { gatewayCronStartHandled = true; - gatewayRuntimeServices.startGatewayCronWithLogging({ - cron: runtimeState.cronState.cron, - logCron, - }); - } - startupTrace.detail("memory.post-ready", collectProcessMemoryUsageMb()); - })().catch((err) => { - log.warn(`gateway post-ready maintenance startup failed: ${String(err)}`); + }, + cron: runtimeState.cronState.cron, + logCron, + log, + recordPostReadyMemory: () => { + startupTrace.detail("memory.post-ready", collectProcessMemoryUsageMb()); + }, }); }, POST_READY_MAINTENANCE_DELAY_MS); handle.unref?.();