diff --git a/extensions/memory-core/src/memory/manager-sync-ops.ts b/extensions/memory-core/src/memory/manager-sync-ops.ts index f32d52aba3d..ca0881f4713 100644 --- a/extensions/memory-core/src/memory/manager-sync-ops.ts +++ b/extensions/memory-core/src/memory/manager-sync-ops.ts @@ -446,6 +446,12 @@ export abstract class MemoryManagerSyncOps { this.watcher.on("change", markDirty); this.watcher.on("unlink", markDirty); this.watcher.on("unlinkDir", markDirty); + this.watcher.on("error", (err) => { + // File watcher errors (e.g., ENOSPC) should not crash the gateway. + // Log the error and continue - memory search still works without auto-sync. + const message = err instanceof Error ? err.message : String(err); + log.warn(`memory watcher error: ${message}`); + }); } protected ensureSessionListener() { diff --git a/src/infra/unhandled-rejections.test.ts b/src/infra/unhandled-rejections.test.ts index c3731d89b5b..fbe5bcc02db 100644 --- a/src/infra/unhandled-rejections.test.ts +++ b/src/infra/unhandled-rejections.test.ts @@ -2,6 +2,7 @@ import { describe, expect, it } from "vitest"; import { isAbortError, isBenignUncaughtExceptionError, + isTransientFileWatchError, isTransientNetworkError, isTransientSqliteError, isTransientUnhandledRejectionError, @@ -258,6 +259,87 @@ describe("isTransientSqliteError", () => { }); }); +describe("isTransientFileWatchError", () => { + it("returns true for ENOSPC with inotify message", () => { + const error = Object.assign(new Error("inotify watches exhausted"), { code: "ENOSPC" }); + expect(isTransientFileWatchError(error)).toBe(true); + }); + + it("returns true for ENOSPC with file watcher message", () => { + const error = Object.assign(new Error("System limit for number of file watchers reached"), { + code: "ENOSPC", + }); + expect(isTransientFileWatchError(error)).toBe(true); + }); + + it("returns true for ENOSPC with watcher error message", () => { + const error = Object.assign(new Error("watcher error: ENOSPC"), { code: "ENOSPC" }); + expect(isTransientFileWatchError(error)).toBe(true); + }); + + it("returns false for ENOSPC without watch indicator (general disk full)", () => { + const error = Object.assign(new Error("write failed: no space left on device"), { + code: "ENOSPC", + }); + expect(isTransientFileWatchError(error)).toBe(false); + }); + + it("returns false for ENOSPC with only 'disk full' message", () => { + const error = Object.assign(new Error("ENOSPC: disk full"), { code: "ENOSPC" }); + expect(isTransientFileWatchError(error)).toBe(false); + }); + + it("returns true for 'no space left on device' message with watcher context", () => { + const error = new Error("file watcher: no space left on device"); + expect(isTransientFileWatchError(error)).toBe(true); + }); + + it("returns true for inotify-related error messages", () => { + expect(isTransientFileWatchError(new Error("inotify watches exhausted"))).toBe(true); + expect( + isTransientFileWatchError(new Error("System limit for number of file watchers reached")), + ).toBe(true); + }); + + it("returns true for watcher-related error messages", () => { + expect(isTransientFileWatchError(new Error("watcher error: ENOSPC"))).toBe(true); + expect(isTransientFileWatchError(new Error("file watcher failed"))).toBe(true); + }); + + it("returns true for ENOSPC with cause chain containing watch indicator", () => { + const cause = Object.assign(new Error("inotify watches exhausted"), { code: "ENOSPC" }); + const error = Object.assign(new Error("watcher failed"), { cause }); + expect(isTransientFileWatchError(error)).toBe(true); + }); + + it("returns false for 'watchdog timeout' (unrelated watch error)", () => { + expect(isTransientFileWatchError(new Error("watchdog timeout"))).toBe(false); + expect(isTransientFileWatchError(new Error("cannot watch process"))).toBe(false); + }); + + it("returns false for regular errors without file watch indicators", () => { + expect(isTransientFileWatchError(new Error("Something went wrong"))).toBe(false); + expect(isTransientFileWatchError(new TypeError("Cannot read property"))).toBe(false); + expect(isTransientFileWatchError(new RangeError("Invalid array length"))).toBe(false); + }); + + it("returns false for other disk errors without ENOSPC", () => { + expect(isTransientFileWatchError(new Error("disk quota exceeded"))).toBe(false); + expect( + isTransientFileWatchError( + Object.assign(new Error("read only file system"), { code: "EROFS" }), + ), + ).toBe(false); + }); + + it.each([null, undefined, "string error", 42, { message: "plain object" }])( + "returns false for non-file-watch input %#", + (value) => { + expect(isTransientFileWatchError(value)).toBe(false); + }, + ); +}); + describe("isTransientUnhandledRejectionError", () => { it("treats raw pre-connect network uncaught exceptions as benign", () => { const epipe = Object.assign(new Error("write EPIPE"), { code: "EPIPE" }); @@ -287,4 +369,21 @@ describe("isTransientUnhandledRejectionError", () => { expect(isTransientUnhandledRejectionError(error)).toBe(true); }); + + it("returns true for transient file watcher errors (ENOSPC + inotify)", () => { + const error = Object.assign(new Error("inotify watches exhausted"), { code: "ENOSPC" }); + expect(isTransientUnhandledRejectionError(error)).toBe(true); + }); + + it("returns true for file watcher errors with message only", () => { + const error = new Error("System limit for number of file watchers reached"); + expect(isTransientUnhandledRejectionError(error)).toBe(true); + }); + + it("returns false for ENOSPC without watch indicator (general disk full)", () => { + const error = Object.assign(new Error("write failed: no space left on device"), { + code: "ENOSPC", + }); + expect(isTransientUnhandledRejectionError(error)).toBe(false); + }); }); diff --git a/src/infra/unhandled-rejections.ts b/src/infra/unhandled-rejections.ts index 81f57a05af3..ec1e0800bc5 100644 --- a/src/infra/unhandled-rejections.ts +++ b/src/infra/unhandled-rejections.ts @@ -350,8 +350,70 @@ export function isTransientSqliteError(err: unknown): boolean { return false; } +/** + * Checks if an error is a transient file watcher error that shouldn't crash the gateway. + * These are typically resource exhaustion issues (e.g., inotify watches exhausted) that + * can be recovered from by degrading to manual sync mode. + * + * Note: ENOSPC is a general POSIX error code (disk full, write failures, etc.). + * To avoid misclassifying unrelated storage failures, we require both the ENOSPC code + * AND a watch/inotify-related message indicator, similar to how hasSqliteSignal gates + * SQLite errors. + */ +export function isTransientFileWatchError(err: unknown): boolean { + if (!err) { + return false; + } + + for (const candidate of collectNestedUnhandledErrorCandidates(err)) { + // Skip non-object candidates early + if (!candidate || typeof candidate !== "object") { + continue; + } + + const code = extractErrorCodeOrErrno(candidate); + const rawMessage = + "message" in candidate && typeof candidate.message === "string" ? candidate.message : ""; + const message = normalizeLowercaseStringOrEmpty(rawMessage); + + // ENOSPC requires both the code AND a watch/inotify message indicator + // to avoid misclassifying general disk-full errors as transient watcher errors. + if (code === "ENOSPC") { + if ( + message.includes("inotify") || + message.includes("watcher") || + message.includes("file watcher") || + message.includes("watch limit") || + message.includes("max watches") + ) { + return true; + } + // ENOSPC without watch indicator is not classified here + continue; + } + + // Check for file watcher error message patterns (without ENOSPC code) + if (!message) { + continue; + } + if ( + message.includes("no space left on device") || + message.includes("enosp") || + message.includes("inotify watches") || + message.includes("file watcher") || + message.includes("watcher error") + ) { + return true; + } + } + + return false; +} + export function isTransientUnhandledRejectionError(err: unknown): boolean { - return isTransientNetworkError(err) || isTransientSqliteError(err); + return ( + isTransientNetworkError(err) || isTransientSqliteError(err) || isTransientFileWatchError(err) + ); } function isBenignUncaughtNetworkException(err: unknown): boolean {