fix: handle ENOSPC file watcher errors gracefully (#73357)

Merged via squash.

Prepared head SHA: ce2dd6ed3e
Co-authored-by: solodmd <51304754+solodmd@users.noreply.github.com>
Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com>
Reviewed-by: @altaywtf
This commit is contained in:
solodmd
2026-05-03 16:42:38 +08:00
committed by GitHub
parent 4781b46056
commit d1365fef16
5 changed files with 230 additions and 6 deletions

View File

@@ -114,6 +114,7 @@ Docs: https://docs.openclaw.ai
- Parallels/Windows update smoke: escape the stale post-swap import regex in the generated PowerShell script so expected `ERR_MODULE_NOT_FOUND` update handoffs continue to post-update health checks. (#75315)
- Slack: allow draft preview streaming in top-level DMs when `replyToMode` is `off` while keeping Slack native streaming and assistant thread status gated on reply threads. Fixes #56480. (#56544) Thanks @HangGlidersRule.
- Control UI/chat: remove the delete-confirm popover outside-click listener on every dismiss path, so Cancel, Delete, outside clicks, and same-button toggles no longer leave stale document listeners behind. Refs #75590 and #69982. Thanks @Ricardo-M-L.
- Memory-core: treat exhausted file watcher limits as non-fatal for builtin memory auto-sync while preserving fatal handling for unrelated disk-full errors. (#73357) Thanks @solodmd.
## 2026.5.2

View File

@@ -446,6 +446,12 @@ export abstract class MemoryManagerSyncOps {
this.watcher.on("change", markDirty);
this.watcher.on("unlink", markDirty);
this.watcher.on("unlinkDir", markDirty);
this.watcher.on("error", (err) => {
// File watcher errors (e.g., ENOSPC) should not crash the gateway.
// Log the error and continue - memory search still works without auto-sync.
const message = err instanceof Error ? err.message : String(err);
log.warn(`memory watcher error: ${message}`);
});
}
protected ensureSessionListener() {

View File

@@ -9,9 +9,9 @@ import { afterAll, afterEach, beforeEach, describe, expect, it, vi } from "vites
type WatchIgnoredFn = (watchPath: string, stats?: { isDirectory?: () => boolean }) => boolean;
const { createdWatchers, watchMock } = vi.hoisted(() => {
type WatchEvent = "add" | "change" | "unlink" | "unlinkDir";
type WatchCallback = () => void;
const { createdWatchers, memoryLoggerWarn, watchMock } = vi.hoisted(() => {
type WatchEvent = "add" | "change" | "unlink" | "unlinkDir" | "error";
type WatchCallback = (value?: unknown) => void;
function createMockWatcher() {
const handlers = new Map<WatchEvent, WatchCallback[]>();
const watcher = {
@@ -20,9 +20,9 @@ const { createdWatchers, watchMock } = vi.hoisted(() => {
return watcher;
}),
close: vi.fn(async () => undefined),
emit: (event: WatchEvent) => {
emit: (event: WatchEvent, value?: unknown) => {
for (const callback of handlers.get(event) ?? []) {
callback();
callback(value);
}
},
};
@@ -31,6 +31,7 @@ const { createdWatchers, watchMock } = vi.hoisted(() => {
const watchers: Array<ReturnType<typeof createMockWatcher>> = [];
const result = {
createdWatchers: watchers,
memoryLoggerWarn: vi.fn(),
watchMock: vi.fn(() => {
const watcher = createMockWatcher();
watchers.push(watcher);
@@ -42,6 +43,18 @@ const { createdWatchers, watchMock } = vi.hoisted(() => {
return result;
});
vi.mock("openclaw/plugin-sdk/memory-core-host-engine-foundation", async (importOriginal) => {
const actual =
await importOriginal<typeof import("openclaw/plugin-sdk/memory-core-host-engine-foundation")>();
return {
...actual,
createSubsystemLogger: (subsystem: string) => ({
...actual.createSubsystemLogger(subsystem),
warn: memoryLoggerWarn,
}),
};
});
vi.mock("./sqlite-vec.js", () => ({
loadSqliteVecExtension: async () => ({ ok: false, error: "sqlite-vec disabled in tests" }),
}));
@@ -246,4 +259,16 @@ describe("memory watcher config", () => {
expect(syncSpy).toHaveBeenCalledWith({ reason: "watch" });
},
);
it("attaches a logging non-throwing watcher error listener", async () => {
await setupWatcherWorkspace({ name: "notes.md", contents: "hello" });
const cfg = createWatcherConfig();
await expectWatcherManager(cfg);
const watcher = createdWatchers[0];
expect(watcher?.on).toHaveBeenCalledWith("error", expect.any(Function));
expect(() => watcher?.emit("error", new Error("watcher error: ENOSPC"))).not.toThrow();
expect(memoryLoggerWarn).toHaveBeenCalledWith("memory watcher error: watcher error: ENOSPC");
});
});

View File

@@ -2,6 +2,7 @@ import { describe, expect, it } from "vitest";
import {
isAbortError,
isBenignUncaughtExceptionError,
isTransientFileWatchError,
isTransientNetworkError,
isTransientSqliteError,
isTransientUnhandledRejectionError,
@@ -258,6 +259,104 @@ describe("isTransientSqliteError", () => {
});
});
describe("isTransientFileWatchError", () => {
it("returns true for ENOSPC with inotify message", () => {
const error = Object.assign(new Error("inotify watches exhausted"), { code: "ENOSPC" });
expect(isTransientFileWatchError(error)).toBe(true);
});
it("returns true for ENOSPC with file watcher message", () => {
const error = Object.assign(new Error("System limit for number of file watchers reached"), {
code: "ENOSPC",
});
expect(isTransientFileWatchError(error)).toBe(true);
});
it("returns true for ENOSPC with watcher error message", () => {
const error = Object.assign(new Error("watcher error: ENOSPC"), { code: "ENOSPC" });
expect(isTransientFileWatchError(error)).toBe(true);
});
it("returns false for ENOSPC without watch indicator (general disk full)", () => {
const error = Object.assign(new Error("write failed: no space left on device"), {
code: "ENOSPC",
});
expect(isTransientFileWatchError(error)).toBe(false);
});
it("returns false for ENOSPC with only 'disk full' message", () => {
const error = Object.assign(new Error("ENOSPC: disk full"), { code: "ENOSPC" });
expect(isTransientFileWatchError(error)).toBe(false);
});
it("returns false for message-only disk full without watch indicator", () => {
expect(isTransientFileWatchError(new Error("write failed: no space left on device"))).toBe(
false,
);
expect(isTransientFileWatchError(new Error("ENOSPC: no space left on device"))).toBe(false);
});
it("returns true for 'no space left on device' message with watcher context", () => {
const error = new Error("file watcher: no space left on device");
expect(isTransientFileWatchError(error)).toBe(true);
});
it("returns true for inotify-related error messages", () => {
expect(isTransientFileWatchError(new Error("inotify watches exhausted"))).toBe(true);
expect(
isTransientFileWatchError(new Error("System limit for number of file watchers reached")),
).toBe(true);
});
it("returns true for watcher-related no-space messages", () => {
expect(isTransientFileWatchError(new Error("file watcher: no space left on device"))).toBe(
true,
);
});
it("returns false for generic code-less watcher messages", () => {
expect(isTransientFileWatchError(new Error("file watcher failed"))).toBe(false);
expect(isTransientFileWatchError(new Error("watcher error: boom"))).toBe(false);
expect(isTransientFileWatchError(new Error("watcher error: ENOSPC"))).toBe(false);
expect(isTransientUnhandledRejectionError(new Error("file watcher failed"))).toBe(false);
expect(isTransientUnhandledRejectionError(new Error("watcher error: boom"))).toBe(false);
expect(isTransientUnhandledRejectionError(new Error("watcher error: ENOSPC"))).toBe(false);
});
it("returns true for ENOSPC with cause chain containing watch indicator", () => {
const cause = Object.assign(new Error("inotify watches exhausted"), { code: "ENOSPC" });
const error = Object.assign(new Error("watcher failed"), { cause });
expect(isTransientFileWatchError(error)).toBe(true);
});
it("returns false for 'watchdog timeout' (unrelated watch error)", () => {
expect(isTransientFileWatchError(new Error("watchdog timeout"))).toBe(false);
expect(isTransientFileWatchError(new Error("cannot watch process"))).toBe(false);
});
it("returns false for regular errors without file watch indicators", () => {
expect(isTransientFileWatchError(new Error("Something went wrong"))).toBe(false);
expect(isTransientFileWatchError(new TypeError("Cannot read property"))).toBe(false);
expect(isTransientFileWatchError(new RangeError("Invalid array length"))).toBe(false);
});
it("returns false for other disk errors without ENOSPC", () => {
expect(isTransientFileWatchError(new Error("disk quota exceeded"))).toBe(false);
expect(
isTransientFileWatchError(
Object.assign(new Error("read only file system"), { code: "EROFS" }),
),
).toBe(false);
});
it.each([null, undefined, "string error", 42, { message: "plain object" }])(
"returns false for non-file-watch input %#",
(value) => {
expect(isTransientFileWatchError(value)).toBe(false);
},
);
});
describe("isTransientUnhandledRejectionError", () => {
it("treats raw pre-connect network uncaught exceptions as benign", () => {
const epipe = Object.assign(new Error("write EPIPE"), { code: "EPIPE" });
@@ -287,4 +386,30 @@ describe("isTransientUnhandledRejectionError", () => {
expect(isTransientUnhandledRejectionError(error)).toBe(true);
});
it("returns true for transient file watcher errors (ENOSPC + inotify)", () => {
const error = Object.assign(new Error("inotify watches exhausted"), { code: "ENOSPC" });
expect(isTransientUnhandledRejectionError(error)).toBe(true);
});
it("returns true for file watcher errors with message only", () => {
const error = new Error("System limit for number of file watchers reached");
expect(isTransientUnhandledRejectionError(error)).toBe(true);
});
it("returns false for ENOSPC without watch indicator (general disk full)", () => {
const error = Object.assign(new Error("write failed: no space left on device"), {
code: "ENOSPC",
});
expect(isTransientUnhandledRejectionError(error)).toBe(false);
});
it("returns false for code-less disk full messages without watch indicator", () => {
expect(
isTransientUnhandledRejectionError(new Error("write failed: no space left on device")),
).toBe(false);
expect(isTransientUnhandledRejectionError(new Error("ENOSPC: no space left on device"))).toBe(
false,
);
});
});

View File

@@ -350,8 +350,75 @@ export function isTransientSqliteError(err: unknown): boolean {
return false;
}
/**
* Checks if an error is a transient file watcher error that shouldn't crash the gateway.
* These are typically resource exhaustion issues (e.g., inotify watches exhausted) that
* can be recovered from by degrading to manual sync mode.
*
* Note: ENOSPC is a general POSIX error code (disk full, write failures, etc.).
* To avoid misclassifying unrelated storage failures, we require both the ENOSPC code
* AND a watch/inotify-related message indicator, similar to how hasSqliteSignal gates
* SQLite errors.
*/
export function isTransientFileWatchError(err: unknown): boolean {
if (!err) {
return false;
}
const hasFileWatchSignal = (message: string) =>
message.includes("inotify") ||
message.includes("watcher") ||
message.includes("file watcher") ||
message.includes("watch limit") ||
message.includes("max watches");
const hasFileWatchExhaustionSignal = (message: string) =>
message.includes("inotify watches") ||
message.includes("inotify watch") ||
message.includes("system limit for number of file watchers") ||
message.includes("watch limit") ||
message.includes("max watches");
for (const candidate of collectNestedUnhandledErrorCandidates(err)) {
// Skip non-object candidates early
if (!candidate || typeof candidate !== "object") {
continue;
}
const code = extractErrorCodeOrErrno(candidate);
const rawMessage =
"message" in candidate && typeof candidate.message === "string" ? candidate.message : "";
const message = normalizeLowercaseStringOrEmpty(rawMessage);
// ENOSPC requires both the code AND a watch/inotify message indicator
// to avoid misclassifying general disk-full errors as transient watcher errors.
if (code === "ENOSPC") {
if (hasFileWatchSignal(message)) {
return true;
}
// ENOSPC without watch indicator is not classified here
continue;
}
// Without an ENOSPC code, only classify explicit watcher resource exhaustion.
// Generic "file watcher failed" labels can wrap permission/config/runtime failures.
if (!message) {
continue;
}
if (
(message.includes("no space left on device") && hasFileWatchSignal(message)) ||
hasFileWatchExhaustionSignal(message)
) {
return true;
}
}
return false;
}
export function isTransientUnhandledRejectionError(err: unknown): boolean {
return isTransientNetworkError(err) || isTransientSqliteError(err);
return (
isTransientNetworkError(err) || isTransientSqliteError(err) || isTransientFileWatchError(err)
);
}
function isBenignUncaughtNetworkException(err: unknown): boolean {