mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 06:20:43 +00:00
fix(gateway): keep cron startup after maintenance failure
This commit is contained in:
@@ -47,6 +47,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
### Fixes
|
||||
|
||||
- Gateway/startup: start cron and record the post-ready memory trace even when deferred maintenance timers fail after readiness, so a non-fatal timer setup issue does not silently leave scheduled jobs idle. Thanks @vincentkoc.
|
||||
- Agents/session status: keep semantic `session_status({ sessionKey: "current" })` on the live run session even before that run has a persisted session-store entry, instead of falling back to the sandbox policy key. Thanks @vincentkoc.
|
||||
- QA/Slack: resolve bundled official plugin public-surface package aliases during source-mode QA runs, so release Slack live validation can load `@openclaw/slack/api.js` without workspace symlinks. Thanks @vincentkoc.
|
||||
- Codex: pass the live run session key into app-server dynamic tools when sandbox policy uses a separate session key, so `session_status({ sessionKey: "current" })` reports the active run instead of the sandbox policy key. Thanks @vincentkoc.
|
||||
|
||||
@@ -52,8 +52,11 @@ vi.mock("./model-pricing-cache.js", () => ({
|
||||
startGatewayModelPricingRefresh: hoisted.startGatewayModelPricingRefresh,
|
||||
}));
|
||||
|
||||
const { activateGatewayScheduledServices, startGatewayRuntimeServices } =
|
||||
await import("./server-runtime-services.js");
|
||||
const {
|
||||
activateGatewayScheduledServices,
|
||||
runGatewayPostReadyMaintenance,
|
||||
startGatewayRuntimeServices,
|
||||
} = await import("./server-runtime-services.js");
|
||||
|
||||
describe("server-runtime-services", () => {
|
||||
beforeEach(() => {
|
||||
@@ -217,6 +220,31 @@ describe("server-runtime-services", () => {
|
||||
expect(hoisted.recoverPendingDeliveries).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("starts cron and records memory when post-ready maintenance fails", async () => {
|
||||
const cron = { start: vi.fn(async () => undefined) };
|
||||
const log = createLog();
|
||||
const recordPostReadyMemory = vi.fn();
|
||||
|
||||
await runGatewayPostReadyMaintenance({
|
||||
startMaintenance: vi.fn(async () => {
|
||||
throw new Error("timers unavailable");
|
||||
}),
|
||||
applyMaintenance: vi.fn(),
|
||||
shouldStartCron: () => true,
|
||||
markCronStartHandled: vi.fn(),
|
||||
cron,
|
||||
logCron: { error: vi.fn() },
|
||||
log,
|
||||
recordPostReadyMemory,
|
||||
});
|
||||
|
||||
expect(log.warn).toHaveBeenCalledWith(
|
||||
"gateway post-ready maintenance startup failed: Error: timers unavailable",
|
||||
);
|
||||
expect(cron.start).toHaveBeenCalledTimes(1);
|
||||
expect(recordPostReadyMemory).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("keeps scheduled services disabled for minimal test gateways", () => {
|
||||
const cron = { start: vi.fn(async () => undefined) };
|
||||
|
||||
@@ -247,6 +275,7 @@ function createLog() {
|
||||
warn: vi.fn(),
|
||||
error: vi.fn(),
|
||||
})),
|
||||
warn: vi.fn(),
|
||||
error: vi.fn(),
|
||||
};
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ import type { PluginMetadataRegistryView } from "../plugins/plugin-metadata-snap
|
||||
import type { ChannelHealthMonitor } from "./channel-health-monitor.js";
|
||||
import { startChannelHealthMonitor } from "./channel-health-monitor.js";
|
||||
import { isGatewayModelPricingEnabled } from "./model-pricing-config.js";
|
||||
import type { startGatewayMaintenanceTimers } from "./server-maintenance.js";
|
||||
|
||||
type GatewayRuntimeServiceLogger = {
|
||||
child: (name: string) => {
|
||||
@@ -14,6 +15,12 @@ type GatewayRuntimeServiceLogger = {
|
||||
};
|
||||
error: (message: string) => void;
|
||||
};
|
||||
type GatewayPostReadyLogger = {
|
||||
warn: (message: string) => void;
|
||||
};
|
||||
type GatewayMaintenanceHandles = NonNullable<
|
||||
Awaited<ReturnType<typeof startGatewayMaintenanceTimers>>
|
||||
>;
|
||||
|
||||
export type GatewayChannelManager = Parameters<
|
||||
typeof startChannelHealthMonitor
|
||||
@@ -53,6 +60,34 @@ export function startGatewayCronWithLogging(params: {
|
||||
void params.cron.start().catch((err) => params.logCron.error(`failed to start: ${String(err)}`));
|
||||
}
|
||||
|
||||
export async function runGatewayPostReadyMaintenance(params: {
|
||||
startMaintenance: () => Promise<GatewayMaintenanceHandles | null>;
|
||||
applyMaintenance: (maintenance: GatewayMaintenanceHandles) => void;
|
||||
shouldStartCron: () => boolean;
|
||||
markCronStartHandled: () => void;
|
||||
cron: { start: () => Promise<void> };
|
||||
logCron: { error: (message: string) => void };
|
||||
log: GatewayPostReadyLogger;
|
||||
recordPostReadyMemory: () => void;
|
||||
}): Promise<void> {
|
||||
try {
|
||||
const maintenance = await params.startMaintenance();
|
||||
if (maintenance) {
|
||||
params.applyMaintenance(maintenance);
|
||||
}
|
||||
} catch (err) {
|
||||
params.log.warn(`gateway post-ready maintenance startup failed: ${String(err)}`);
|
||||
}
|
||||
if (params.shouldStartCron()) {
|
||||
params.markCronStartHandled();
|
||||
startGatewayCronWithLogging({
|
||||
cron: params.cron,
|
||||
logCron: params.logCron,
|
||||
});
|
||||
}
|
||||
params.recordPostReadyMemory();
|
||||
}
|
||||
|
||||
function recoverPendingOutboundDeliveries(params: {
|
||||
cfg: OpenClawConfig;
|
||||
log: GatewayRuntimeServiceLogger;
|
||||
|
||||
@@ -1491,24 +1491,24 @@ export async function startGatewayServer(
|
||||
});
|
||||
if (!minimalTestGateway) {
|
||||
const handle = setTimeout(() => {
|
||||
void (async () => {
|
||||
const maintenance = await earlyRuntime.startMaintenance();
|
||||
if (maintenance) {
|
||||
void gatewayRuntimeServices.runGatewayPostReadyMaintenance({
|
||||
startMaintenance: earlyRuntime.startMaintenance,
|
||||
applyMaintenance: (maintenance) => {
|
||||
runtimeState.tickInterval = maintenance.tickInterval;
|
||||
runtimeState.healthInterval = maintenance.healthInterval;
|
||||
runtimeState.dedupeCleanup = maintenance.dedupeCleanup;
|
||||
runtimeState.mediaCleanup = maintenance.mediaCleanup;
|
||||
}
|
||||
if (!gatewayCronStartHandled) {
|
||||
},
|
||||
shouldStartCron: () => !gatewayCronStartHandled,
|
||||
markCronStartHandled: () => {
|
||||
gatewayCronStartHandled = true;
|
||||
gatewayRuntimeServices.startGatewayCronWithLogging({
|
||||
cron: runtimeState.cronState.cron,
|
||||
logCron,
|
||||
});
|
||||
}
|
||||
startupTrace.detail("memory.post-ready", collectProcessMemoryUsageMb());
|
||||
})().catch((err) => {
|
||||
log.warn(`gateway post-ready maintenance startup failed: ${String(err)}`);
|
||||
},
|
||||
cron: runtimeState.cronState.cron,
|
||||
logCron,
|
||||
log,
|
||||
recordPostReadyMemory: () => {
|
||||
startupTrace.detail("memory.post-ready", collectProcessMemoryUsageMb());
|
||||
},
|
||||
});
|
||||
}, POST_READY_MAINTENANCE_DELAY_MS);
|
||||
handle.unref?.();
|
||||
|
||||
Reference in New Issue
Block a user