From f369939fedd5f33be7ec5eb41d8f53893619c9f3 Mon Sep 17 00:00:00 2001 From: Josh Lehman Date: Fri, 24 Apr 2026 17:48:35 -0700 Subject: [PATCH] fix: avoid plugin-local config recovery rollback (#71289) --- src/config/config.ts | 1 + src/config/io.observe-recovery.test.ts | 115 ++++++++++++++++++ src/config/io.observe-recovery.ts | 12 ++ src/config/recovery-policy.ts | 35 ++++++ src/gateway/config-reload.test.ts | 56 +++++++++ src/gateway/config-reload.ts | 7 ++ .../server-startup-config.recovery.test.ts | 66 ++++++++++ src/gateway/server-startup-config.ts | 17 ++- 8 files changed, 305 insertions(+), 4 deletions(-) create mode 100644 src/config/recovery-policy.ts diff --git a/src/config/config.ts b/src/config/config.ts index c235593e45a..79bc92817e2 100644 --- a/src/config/config.ts +++ b/src/config/config.ts @@ -28,6 +28,7 @@ export { export type { ConfigWriteNotification } from "./io.js"; export { ConfigMutationConflictError, mutateConfigFile, replaceConfigFile } from "./mutate.js"; export * from "./paths.js"; +export * from "./recovery-policy.js"; export * from "./runtime-overrides.js"; export * from "./types.js"; export { diff --git a/src/config/io.observe-recovery.test.ts b/src/config/io.observe-recovery.test.ts index b5f85c09bcb..49b22fa498d 100644 --- a/src/config/io.observe-recovery.test.ts +++ b/src/config/io.observe-recovery.test.ts @@ -347,6 +347,121 @@ describe("config observe recovery", () => { }); }); + it("does not restore stale last-known-good for plugin schema evolution issues", async () => { + await withSuiteHome(async (home) => { + const { deps, configPath, warn } = makeDeps(home); + const staleSnapshot = await makeSnapshot(configPath, { + gateway: { mode: "local" }, + agents: { defaults: { model: "sonnet-4.6" } }, + plugins: { + entries: { + "lossless-claw": { + enabled: true, + config: { compactionMode: "legacy" }, + }, + }, + }, + }); + await expect( + promoteConfigSnapshotToLastKnownGood({ + deps, + snapshot: staleSnapshot, + logger: deps.logger, + }), + ).resolves.toBe(true); + + const activeConfig = { + gateway: { mode: "local" }, + agents: { defaults: { model: "gpt-5.4" } }, + plugins: { + entries: { + "lossless-claw": { + enabled: true, + config: { compactionMode: "adaptive", cacheAwareCompaction: true }, + }, + }, + }, + }; + const active = await writeConfigRaw(configPath, activeConfig); + const restored = await recoverConfigFromLastKnownGood({ + deps, + snapshot: { + ...staleSnapshot, + raw: active.raw, + parsed: active.parsed, + valid: false, + issues: [ + { + path: "plugins.entries.lossless-claw.config.cacheAwareCompaction", + message: "invalid config: must NOT have additional properties", + }, + ], + }, + reason: "reload-invalid-config", + }); + + expect(restored).toBe(false); + await expect(fsp.readFile(configPath, "utf-8")).resolves.toBe(active.raw); + expect(warn).toHaveBeenCalledWith( + expect.stringContaining("Config last-known-good recovery skipped"), + ); + }); + }); + + it("does not restore stale last-known-good for plugin minHostVersion skew issues", async () => { + await withSuiteHome(async (home) => { + const { deps, configPath } = makeDeps(home); + const staleSnapshot = await makeSnapshot(configPath, { + gateway: { mode: "local" }, + plugins: { + entries: { + feishu: { enabled: false }, + }, + }, + }); + await expect( + promoteConfigSnapshotToLastKnownGood({ + deps, + snapshot: staleSnapshot, + logger: deps.logger, + }), + ).resolves.toBe(true); + + const activeConfig = { + gateway: { mode: "local" }, + agents: { defaults: { model: "gpt-5.4" } }, + plugins: { + entries: { + feishu: { enabled: true, config: { appId: "feishu-app" } }, + whatsapp: { enabled: true, config: { account: "primary" } }, + }, + }, + }; + const active = await writeConfigRaw(configPath, activeConfig); + const restored = await recoverConfigFromLastKnownGood({ + deps, + snapshot: { + ...staleSnapshot, + raw: active.raw, + parsed: active.parsed, + valid: false, + issues: [ + { + path: "plugins.entries.feishu", + message: + "plugin feishu: plugin requires OpenClaw >=2026.4.23, but this host is 2026.4.22; skipping load", + }, + ], + }, + reason: "reload-invalid-config", + }); + + expect(restored).toBe(false); + await expect(fsp.readFile(configPath, "utf-8")).resolves.toBe(active.raw); + expect(JSON5.parse(active.raw)).toEqual(activeConfig); + }); + }); + it("refuses to promote redacted secret placeholders", async () => { await withSuiteHome(async (home) => { const warn = vi.fn(); diff --git a/src/config/io.observe-recovery.ts b/src/config/io.observe-recovery.ts index 952ee936556..d844197efff 100644 --- a/src/config/io.observe-recovery.ts +++ b/src/config/io.observe-recovery.ts @@ -7,6 +7,10 @@ import { type ConfigObserveAuditRecord, } from "./io.audit.js"; import { resolveStateDir } from "./paths.js"; +import { + isPluginLocalInvalidConfigSnapshot, + shouldAttemptLastKnownGoodRecovery, +} from "./recovery-policy.js"; import type { ConfigFileSnapshot } from "./types.openclaw.js"; export type ObserveRecoveryDeps = { @@ -1014,6 +1018,14 @@ export async function recoverConfigFromLastKnownGood(params: { if (!snapshot.exists || typeof snapshot.raw !== "string") { return false; } + if (!shouldAttemptLastKnownGoodRecovery(snapshot)) { + if (isPluginLocalInvalidConfigSnapshot(snapshot)) { + deps.logger.warn( + `Config last-known-good recovery skipped: invalidity is scoped to plugin entries (${params.reason})`, + ); + } + return false; + } const healthState = await readConfigHealthState(deps); const entry = getConfigHealthEntry(healthState, snapshot.path); const promoted = entry.lastPromotedGood; diff --git a/src/config/recovery-policy.ts b/src/config/recovery-policy.ts new file mode 100644 index 00000000000..862364549b9 --- /dev/null +++ b/src/config/recovery-policy.ts @@ -0,0 +1,35 @@ +import type { ConfigFileSnapshot, ConfigValidationIssue } from "./types.openclaw.js"; + +const PLUGIN_ENTRY_PATH_PREFIX = "plugins.entries."; + +function isPluginEntryIssue(issue: ConfigValidationIssue): boolean { + const path = issue.path.trim(); + if (!path.startsWith(PLUGIN_ENTRY_PATH_PREFIX)) { + return false; + } + return path.slice(PLUGIN_ENTRY_PATH_PREFIX.length).trim().length > 0; +} + +/** + * Returns true when an invalid config snapshot is scoped entirely to plugin entries. + */ +export function isPluginLocalInvalidConfigSnapshot( + snapshot: Pick, +): boolean { + if (snapshot.valid || snapshot.legacyIssues.length > 0 || snapshot.issues.length === 0) { + return false; + } + return snapshot.issues.every(isPluginEntryIssue); +} + +/** + * Decides whether whole-file last-known-good recovery is safe for a snapshot. + */ +export function shouldAttemptLastKnownGoodRecovery( + snapshot: Pick, +): boolean { + if (snapshot.valid) { + return false; + } + return !isPluginLocalInvalidConfigSnapshot(snapshot); +} diff --git a/src/gateway/config-reload.test.ts b/src/gateway/config-reload.test.ts index 54095601f0f..eea6b2c29b8 100644 --- a/src/gateway/config-reload.test.ts +++ b/src/gateway/config-reload.test.ts @@ -727,6 +727,62 @@ describe("startGatewayConfigReloader", () => { await reloader.stop(); }); + it("skips last-known-good recovery for plugin-local invalid reloads", async () => { + const activeConfig: OpenClawConfig = { + gateway: { reload: { debounceMs: 0 } }, + agents: { defaults: { model: "gpt-5.4" } }, + plugins: { + entries: { + "lossless-claw": { + enabled: true, + config: { compactionMode: "adaptive", cacheAwareCompaction: true }, + }, + }, + }, + }; + const invalidSnapshot = makeSnapshot({ + valid: false, + raw: `${JSON.stringify(activeConfig, null, 2)}\n`, + parsed: activeConfig, + sourceConfig: activeConfig, + runtimeConfig: activeConfig, + config: activeConfig, + issues: [ + { + path: "plugins.entries.lossless-claw.config.cacheAwareCompaction", + message: "invalid config: must NOT have additional properties", + }, + ], + hash: "plugin-skew-1", + }); + const readSnapshot = vi + .fn<() => Promise>() + .mockResolvedValueOnce(invalidSnapshot); + const recoverSnapshot = vi.fn(async () => true); + const promoteSnapshot = vi.fn(async () => true); + const { watcher, onHotReload, onRestart, log, reloader } = createReloaderHarness(readSnapshot, { + recoverSnapshot, + promoteSnapshot, + }); + + watcher.emit("change"); + await vi.runAllTimersAsync(); + + expect(recoverSnapshot).not.toHaveBeenCalled(); + expect(readSnapshot).toHaveBeenCalledTimes(1); + expect(onHotReload).not.toHaveBeenCalled(); + expect(onRestart).not.toHaveBeenCalled(); + expect(promoteSnapshot).not.toHaveBeenCalled(); + expect(log.warn).toHaveBeenCalledWith( + "config reload recovery skipped after invalid-config: invalidity is scoped to plugin entries", + ); + expect(log.warn).toHaveBeenCalledWith( + expect.stringContaining("config reload skipped (invalid config):"), + ); + + await reloader.stop(); + }); + it("promotes valid external config edits after they are accepted", async () => { const acceptedSnapshot = makeSnapshot({ config: { diff --git a/src/gateway/config-reload.ts b/src/gateway/config-reload.ts index 33c6295196e..585b9e7bc01 100644 --- a/src/gateway/config-reload.ts +++ b/src/gateway/config-reload.ts @@ -7,6 +7,7 @@ import type { ConfigWriteNotification, GatewayReloadMode, } from "../config/config.js"; +import { shouldAttemptLastKnownGoodRecovery } from "../config/config.js"; import { formatConfigIssueLines } from "../config/issue-format.js"; import { isPlainObject } from "../utils.js"; import { @@ -222,6 +223,12 @@ export function startGatewayConfigReloader(opts: { if (!opts.recoverSnapshot) { return null; } + if (!shouldAttemptLastKnownGoodRecovery(snapshot)) { + opts.log.warn( + `config reload recovery skipped after ${reason}: invalidity is scoped to plugin entries`, + ); + return null; + } const recovered = await opts.recoverSnapshot(snapshot, reason); if (!recovered) { return null; diff --git a/src/gateway/server-startup-config.recovery.test.ts b/src/gateway/server-startup-config.recovery.test.ts index 64ec9de3d92..8d0ea271a0f 100644 --- a/src/gateway/server-startup-config.recovery.test.ts +++ b/src/gateway/server-startup-config.recovery.test.ts @@ -8,6 +8,16 @@ vi.mock("../config/config.js", () => ({ readConfigFileSnapshot: vi.fn(), recoverConfigFromLastKnownGood: vi.fn(), recoverConfigFromJsonRootSuffix: vi.fn(), + shouldAttemptLastKnownGoodRecovery: vi.fn((snapshot: ConfigFileSnapshot) => { + if (snapshot.valid) { + return false; + } + return !( + snapshot.legacyIssues.length === 0 && + snapshot.issues.length > 0 && + snapshot.issues.every((issue) => issue.path.startsWith("plugins.entries.")) + ); + }), writeConfigFile: vi.fn(), })); @@ -110,6 +120,62 @@ describe("gateway startup config recovery", () => { expect(recoveryNotice.enqueueConfigRecoveryNotice).not.toHaveBeenCalled(); }); + it("does not restore last-known-good for plugin-local startup invalidity", async () => { + const invalidSnapshot = buildTestConfigSnapshot({ + path: configPath, + exists: true, + raw: `${JSON.stringify({ + gateway: { mode: "local" }, + plugins: { + entries: { + feishu: { enabled: true }, + }, + }, + })}\n`, + parsed: { + gateway: { mode: "local" }, + plugins: { + entries: { + feishu: { enabled: true }, + }, + }, + }, + valid: false, + config: { + gateway: { mode: "local" }, + plugins: { + entries: { + feishu: { enabled: true }, + }, + }, + } as OpenClawConfig, + issues: [ + { + path: "plugins.entries.feishu", + message: + "plugin feishu: plugin requires OpenClaw >=2026.4.23, but this host is 2026.4.22; skipping load", + }, + ], + legacyIssues: [], + }); + vi.mocked(configIo.readConfigFileSnapshot).mockResolvedValueOnce(invalidSnapshot); + const log = { info: vi.fn(), warn: vi.fn() }; + + await expect( + loadGatewayStartupConfigSnapshot({ + minimalTestGateway: true, + log, + }), + ).rejects.toThrow(`Invalid config at ${configPath}.`); + + expect(configIo.recoverConfigFromLastKnownGood).not.toHaveBeenCalled(); + expect(configIo.recoverConfigFromJsonRootSuffix).toHaveBeenCalledWith(invalidSnapshot); + expect(log.warn).toHaveBeenCalledWith( + `gateway: last-known-good recovery skipped for plugin-local config invalidity: ${configPath}`, + ); + expect(recoveryNotice.enqueueConfigRecoveryNotice).not.toHaveBeenCalled(); + }); + it("strips a valid JSON suffix when last-known-good recovery is unavailable", async () => { const invalidSnapshot = buildSnapshot({ valid: false, diff --git a/src/gateway/server-startup-config.ts b/src/gateway/server-startup-config.ts index 59b363e9399..3e7574e9090 100644 --- a/src/gateway/server-startup-config.ts +++ b/src/gateway/server-startup-config.ts @@ -9,6 +9,7 @@ import { readConfigFileSnapshot, recoverConfigFromLastKnownGood, recoverConfigFromJsonRootSuffix, + shouldAttemptLastKnownGoodRecovery, writeConfigFile, } from "../config/config.js"; import { formatConfigIssueLines } from "../config/issue-format.js"; @@ -70,10 +71,18 @@ export async function loadGatewayStartupConfigSnapshot(params: { } if (configSnapshot.exists) { if (!configSnapshot.valid) { - const recovered = await recoverConfigFromLastKnownGood({ - snapshot: configSnapshot, - reason: "startup-invalid-config", - }); + const canRecoverFromLastKnownGood = shouldAttemptLastKnownGoodRecovery(configSnapshot); + const recovered = canRecoverFromLastKnownGood + ? await recoverConfigFromLastKnownGood({ + snapshot: configSnapshot, + reason: "startup-invalid-config", + }) + : false; + if (!canRecoverFromLastKnownGood) { + params.log.warn( + `gateway: last-known-good recovery skipped for plugin-local config invalidity: ${configSnapshot.path}`, + ); + } if (recovered) { wroteConfig = true; params.log.warn(