diff --git a/CHANGELOG.md b/CHANGELOG.md index 2b8fe0e278b..565b1f1156f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -53,6 +53,7 @@ Docs: https://docs.openclaw.ai - Agents/Ollama: preserve unsafe integer tool-call arguments as exact strings during NDJSON parsing, preventing large numeric IDs from being rounded before tool execution. (#23170) Thanks @BestJoester. - Cron/Gateway: keep `cron.list` and `cron.status` responsive during startup catch-up by avoiding a long-held cron lock while missed jobs execute. (#23106) Thanks @jayleekr. - Gateway/Config reload: compare array-valued config paths structurally during diffing so unchanged `memory.qmd.paths` and `memory.qmd.scope.rules` no longer trigger false restart-required reloads. (#23185) Thanks @rex05ai. +- Gateway/Config reload: retry short-lived missing config snapshots during reload before skipping, preventing atomic-write unlink windows from triggering restart loops. (#23343) Thanks @lbo728. - Cron/Scheduling: validate runtime cron expressions before schedule/stagger evaluation so malformed persisted jobs report a clear `invalid cron schedule: expr is required` error instead of crashing with `undefined.trim` failures and auto-disable churn. (#23223) Thanks @asimons81. - Memory/QMD: migrate legacy unscoped collection bindings (for example `memory-root`) to per-agent scoped names (for example `memory-root-main`) during startup when safe, so QMD-backed `memory_search` no longer fails with `Collection not found` after upgrades. (#23228, #20727) Thanks @JLDynamics and @AaronFaby. - Memory/QMD: normalize Han-script BM25 search queries before invoking `qmd search` so mixed CJK+Latin prompts no longer return empty results due to tokenizer mismatch. (#23426) Thanks @LunaLee0130. diff --git a/src/gateway/config-reload.test.ts b/src/gateway/config-reload.test.ts index d81c4cf7d1a..2711eafd71c 100644 --- a/src/gateway/config-reload.test.ts +++ b/src/gateway/config-reload.test.ts @@ -1,12 +1,15 @@ -import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import chokidar from "chokidar"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import { listChannelPlugins } from "../channels/plugins/index.js"; import type { ChannelPlugin } from "../channels/plugins/types.js"; +import type { ConfigFileSnapshot } from "../config/config.js"; import { setActivePluginRegistry } from "../plugins/runtime.js"; import { createTestRegistry } from "../test-utils/channel-plugins.js"; import { buildGatewayReloadPlan, diffConfigPaths, resolveGatewayReloadSettings, + startGatewayConfigReloader, } from "./config-reload.js"; describe("diffConfigPaths", () => { @@ -163,3 +166,134 @@ describe("resolveGatewayReloadSettings", () => { expect(settings.debounceMs).toBe(300); }); }); + +type WatcherHandler = () => void; +type WatcherEvent = "add" | "change" | "unlink" | "error"; + +function createWatcherMock() { + const handlers = new Map(); + return { + on(event: WatcherEvent, handler: WatcherHandler) { + const existing = handlers.get(event) ?? []; + existing.push(handler); + handlers.set(event, existing); + return this; + }, + emit(event: WatcherEvent) { + for (const handler of handlers.get(event) ?? []) { + handler(); + } + }, + close: vi.fn(async () => {}), + }; +} + +function makeSnapshot(partial: Partial = {}): ConfigFileSnapshot { + return { + path: "/tmp/openclaw.json", + exists: true, + raw: "{}", + parsed: {}, + resolved: {}, + valid: true, + config: {}, + issues: [], + warnings: [], + legacyIssues: [], + ...partial, + }; +} + +describe("startGatewayConfigReloader", () => { + beforeEach(() => { + vi.useFakeTimers(); + }); + + afterEach(() => { + vi.useRealTimers(); + vi.restoreAllMocks(); + }); + + it("retries missing snapshots and reloads once config file reappears", async () => { + const watcher = createWatcherMock(); + vi.spyOn(chokidar, "watch").mockReturnValue(watcher as unknown as never); + + const readSnapshot = vi + .fn<() => Promise>() + .mockResolvedValueOnce(makeSnapshot({ exists: false, raw: null, hash: "missing-1" })) + .mockResolvedValueOnce( + makeSnapshot({ + config: { + gateway: { reload: { debounceMs: 0 } }, + hooks: { enabled: true }, + }, + hash: "next-1", + }), + ); + + const onHotReload = vi.fn(async () => {}); + const onRestart = vi.fn(); + const log = { + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + }; + + const reloader = startGatewayConfigReloader({ + initialConfig: { gateway: { reload: { debounceMs: 0 } } }, + readSnapshot, + onHotReload, + onRestart, + log, + watchPath: "/tmp/openclaw.json", + }); + + watcher.emit("unlink"); + await vi.runOnlyPendingTimersAsync(); + await vi.advanceTimersByTimeAsync(150); + + expect(readSnapshot).toHaveBeenCalledTimes(2); + expect(onHotReload).toHaveBeenCalledTimes(1); + expect(onRestart).not.toHaveBeenCalled(); + expect(log.info).toHaveBeenCalledWith("config reload retry (1/2): config file not found"); + expect(log.warn).not.toHaveBeenCalledWith("config reload skipped (config file not found)"); + + await reloader.stop(); + }); + + it("caps missing-file retries and skips reload after retry budget is exhausted", async () => { + const watcher = createWatcherMock(); + vi.spyOn(chokidar, "watch").mockReturnValue(watcher as unknown as never); + + const readSnapshot = vi + .fn<() => Promise>() + .mockResolvedValue(makeSnapshot({ exists: false, raw: null, hash: "missing" })); + + const onHotReload = vi.fn(async () => {}); + const onRestart = vi.fn(); + const log = { + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + }; + + const reloader = startGatewayConfigReloader({ + initialConfig: { gateway: { reload: { debounceMs: 0 } } }, + readSnapshot, + onHotReload, + onRestart, + log, + watchPath: "/tmp/openclaw.json", + }); + + watcher.emit("unlink"); + await vi.runAllTimersAsync(); + + expect(readSnapshot).toHaveBeenCalledTimes(3); + expect(onHotReload).not.toHaveBeenCalled(); + expect(onRestart).not.toHaveBeenCalled(); + expect(log.warn).toHaveBeenCalledWith("config reload skipped (config file not found)"); + + await reloader.stop(); + }); +}); diff --git a/src/gateway/config-reload.ts b/src/gateway/config-reload.ts index 8c3f7c231b8..f92e7440181 100644 --- a/src/gateway/config-reload.ts +++ b/src/gateway/config-reload.ts @@ -44,6 +44,8 @@ const DEFAULT_RELOAD_SETTINGS: GatewayReloadSettings = { mode: "hybrid", debounceMs: 300, }; +const MISSING_CONFIG_RETRY_DELAY_MS = 150; +const MISSING_CONFIG_MAX_RETRIES = 2; const BASE_RELOAD_RULES: ReloadRule[] = [ { prefix: "gateway.remote", kind: "none" }, @@ -268,19 +270,22 @@ export function startGatewayConfigReloader(opts: { let running = false; let stopped = false; let restartQueued = false; + let missingConfigRetries = 0; - const schedule = () => { + const scheduleAfter = (wait: number) => { if (stopped) { return; } if (debounceTimer) { clearTimeout(debounceTimer); } - const wait = settings.debounceMs; debounceTimer = setTimeout(() => { void runReload(); }, wait); }; + const schedule = () => { + scheduleAfter(settings.debounceMs); + }; const runReload = async () => { if (stopped) { @@ -298,9 +303,18 @@ export function startGatewayConfigReloader(opts: { try { const snapshot = await opts.readSnapshot(); if (!snapshot.exists) { - opts.log.warn("config reload skipped (config file not found; may be mid-write)"); + if (missingConfigRetries < MISSING_CONFIG_MAX_RETRIES) { + missingConfigRetries += 1; + opts.log.info( + `config reload retry (${missingConfigRetries}/${MISSING_CONFIG_MAX_RETRIES}): config file not found`, + ); + scheduleAfter(MISSING_CONFIG_RETRY_DELAY_MS); + return; + } + opts.log.warn("config reload skipped (config file not found)"); return; } + missingConfigRetries = 0; if (!snapshot.valid) { const issues = snapshot.issues.map((issue) => `${issue.path}: ${issue.message}`).join(", "); opts.log.warn(`config reload skipped (invalid config): ${issues}`);