From 081060d017498aaecf1288d405841e8b7f88e1be Mon Sep 17 00:00:00 2001 From: Alexander Hill Date: Fri, 10 Apr 2026 13:17:17 -0500 Subject: [PATCH] matrix: detect repeated undecryptable events after startup (cherry picked from commit b4ad939021b83c28f03ffbc41f407754a1e3b8f8) (cherry picked from commit 42d134228c1ef9f35f937f1a49c3abf56c5b2210) --- .../matrix/src/matrix/monitor/events.test.ts | 115 ++++++++++++ .../matrix/src/matrix/monitor/events.ts | 170 +++++++++++++++++- extensions/matrix/src/matrix/monitor/index.ts | 2 + 3 files changed, 279 insertions(+), 8 deletions(-) diff --git a/extensions/matrix/src/matrix/monitor/events.test.ts b/extensions/matrix/src/matrix/monitor/events.test.ts index c3e2f843e63..5267b22780e 100644 --- a/extensions/matrix/src/matrix/monitor/events.test.ts +++ b/extensions/matrix/src/matrix/monitor/events.test.ts @@ -32,6 +32,8 @@ function createHarness(params?: { cryptoAvailable?: boolean; selfUserId?: string; selfUserIdError?: Error; + startupMs?: number; + startupGraceMs?: number; allowFrom?: string[]; dmEnabled?: boolean; dmPolicy?: "open" | "pairing" | "allowlist" | "disabled"; @@ -145,6 +147,8 @@ function createHarness(params?: { warnedEncryptedRooms: new Set(), warnedCryptoMissingRooms: new Set(), logger, + startupMs: params?.startupMs, + startupGraceMs: params?.startupGraceMs, formatNativeDependencyHint, onRoomMessage, }); @@ -1485,6 +1489,117 @@ describe("registerMatrixMonitorEvents verification routing", () => { ); }); + it("classifies repeated fresh post-startup decrypt failures separately", async () => { + vi.useFakeTimers(); + vi.setSystemTime(new Date("2026-04-10T16:21:00.000Z")); + try { + const startupMs = Date.now() - 60_000; + const { logger, failedDecryptListener } = createHarness({ + accountId: "ops", + startupMs, + }); + if (!failedDecryptListener) { + throw new Error("room.failed_decryption listener was not registered"); + } + + for (const [index, roomId] of [ + "!room-a:example.org", + "!room-b:example.org", + "!room-c:example.org", + ].entries()) { + await failedDecryptListener( + roomId, + { + event_id: `$enc-fresh-${index + 1}`, + sender: `@alice${index + 1}:matrix.example.org`, + type: EventType.RoomMessageEncrypted, + origin_server_ts: Date.now() - 1_000 * (index + 1), + content: {}, + }, + new Error("The sender's device has not sent us the keys for this message."), + ); + } + + expect(logger.warn).toHaveBeenNthCalledWith( + 1, + "Failed to decrypt fresh post-startup message", + expect.objectContaining({ + eventId: "$enc-fresh-1", + freshPostStartup: true, + postStartupFailureCount: 1, + }), + ); + expect(logger.warn).toHaveBeenNthCalledWith( + 2, + "Failed to decrypt fresh post-startup message", + expect.objectContaining({ + eventId: "$enc-fresh-2", + freshPostStartup: true, + postStartupFailureCount: 2, + }), + ); + expect(logger.warn).toHaveBeenNthCalledWith( + 3, + "Failed to decrypt fresh post-startup message", + expect.objectContaining({ + eventId: "$enc-fresh-3", + freshPostStartup: true, + postStartupFailureCount: 3, + }), + ); + expect(logger.warn).toHaveBeenNthCalledWith( + 4, + "matrix: repeated fresh encrypted messages are still failing to decrypt after startup. Matrix sync is healthy, but this device may be missing new room keys. Check 'openclaw matrix verify status --verbose --account ops' and 'openclaw matrix devices list --account ops'.", + expect.objectContaining({ + failureCount: 3, + roomCount: 3, + senderCount: 3, + rooms: ["!room-a:example.org", "!room-b:example.org", "!room-c:example.org"], + sampleEventIds: ["$enc-fresh-1", "$enc-fresh-2", "$enc-fresh-3"], + }), + ); + } finally { + vi.useRealTimers(); + } + }); + + it("keeps pre-startup decrypt failures on the generic warning path", async () => { + vi.useFakeTimers(); + vi.setSystemTime(new Date("2026-04-10T16:21:00.000Z")); + try { + const { logger, failedDecryptListener } = createHarness({ + accountId: "ops", + startupMs: Date.now(), + }); + if (!failedDecryptListener) { + throw new Error("room.failed_decryption listener was not registered"); + } + + await failedDecryptListener( + "!room:example.org", + { + event_id: "$enc-old", + sender: "@alice:matrix.example.org", + type: EventType.RoomMessageEncrypted, + origin_server_ts: Date.now() - 5 * 60_000, + content: {}, + }, + new Error("The sender's device has not sent us the keys for this message."), + ); + + expect(logger.warn).toHaveBeenCalledTimes(1); + expect(logger.warn).toHaveBeenCalledWith( + "Failed to decrypt message", + expect.objectContaining({ + eventId: "$enc-old", + freshPostStartup: false, + }), + ); + } finally { + vi.useRealTimers(); + } + }); + it("does not throw when getUserId fails during decrypt guidance lookup", async () => { const { logger, logVerboseMessage, failedDecryptListener } = createHarness({ accountId: "ops", diff --git a/extensions/matrix/src/matrix/monitor/events.ts b/extensions/matrix/src/matrix/monitor/events.ts index 9e5b531f5ba..571d9ea9948 100644 --- a/extensions/matrix/src/matrix/monitor/events.ts +++ b/extensions/matrix/src/matrix/monitor/events.ts @@ -8,6 +8,126 @@ import type { MatrixRawEvent } from "./types.js"; import { EventType } from "./types.js"; import { createMatrixVerificationEventRouter } from "./verification-events.js"; +const MATRIX_POST_STARTUP_DECRYPT_FAILURE_WINDOW_MS = 2 * 60_000; +const MATRIX_POST_STARTUP_DECRYPT_FAILURE_THRESHOLD = 3; +const MATRIX_POST_STARTUP_DECRYPT_FAILURE_SAMPLE_LIMIT = 3; + +type MatrixPostStartupDecryptFailureObservation = { + key: string; + roomId: string; + eventId: string; + sender: string | null; + eventTs: number; + error: string; +}; + +function formatMatrixPostStartupDecryptionHint(accountId: string): string { + return ( + "matrix: repeated fresh encrypted messages are still failing to decrypt after startup. " + + "Matrix sync is healthy, but this device may be missing new room keys. " + + `Check 'openclaw matrix verify status --verbose --account ${accountId}' and 'openclaw matrix devices list --account ${accountId}'.` + ); +} + +function isFreshPostStartupDecryptFailure(params: { + event: MatrixRawEvent; + startupMs?: number; + startupGraceMs?: number; + nowMs: number; +}): boolean { + const { event, startupMs, startupGraceMs = 0, nowMs } = params; + if (typeof startupMs !== "number" || !Number.isFinite(startupMs)) { + return false; + } + const eventTs = event.origin_server_ts; + if (!Number.isFinite(eventTs) || eventTs <= 0) { + return false; + } + if (eventTs < startupMs + startupGraceMs) { + return false; + } + if (eventTs > nowMs + 60_000) { + return false; + } + return true; +} + +function createMatrixPostStartupDecryptFailureTracker(params: { + startupMs?: number; + startupGraceMs?: number; +}) { + let observations: MatrixPostStartupDecryptFailureObservation[] = []; + let warningEmitted = false; + + const pruneObservations = (nowMs: number) => { + observations = observations.filter( + (entry) => nowMs - entry.eventTs <= MATRIX_POST_STARTUP_DECRYPT_FAILURE_WINDOW_MS, + ); + }; + + return { + recordFailure(roomId: string, event: MatrixRawEvent, error: Error) { + const nowMs = Date.now(); + if ( + !isFreshPostStartupDecryptFailure({ + event, + startupMs: params.startupMs, + startupGraceMs: params.startupGraceMs, + nowMs, + }) + ) { + return { freshPostStartup: false, failureCount: 0 } as const; + } + + pruneObservations(nowMs); + + const key = `${roomId}|${event.event_id}`; + if (!observations.some((entry) => entry.key === key)) { + observations.push({ + key, + roomId, + eventId: event.event_id, + sender: typeof event.sender === "string" ? event.sender : null, + eventTs: event.origin_server_ts, + error: error.message, + }); + } + + const failureCount = observations.length; + if (warningEmitted || failureCount < MATRIX_POST_STARTUP_DECRYPT_FAILURE_THRESHOLD) { + return { freshPostStartup: true, failureCount } as const; + } + + warningEmitted = true; + const rooms = [...new Set(observations.map((entry) => entry.roomId))].slice( + 0, + MATRIX_POST_STARTUP_DECRYPT_FAILURE_SAMPLE_LIMIT, + ); + const senders = [...new Set(observations.map((entry) => entry.sender).filter(Boolean))].slice( + 0, + MATRIX_POST_STARTUP_DECRYPT_FAILURE_SAMPLE_LIMIT, + ); + const eventIds = observations + .slice(-MATRIX_POST_STARTUP_DECRYPT_FAILURE_SAMPLE_LIMIT) + .map((entry) => entry.eventId); + const latestError = observations.at(-1)?.error ?? error.message; + return { + freshPostStartup: true, + failureCount, + warning: { + rooms, + roomCount: new Set(observations.map((entry) => entry.roomId)).size, + senders, + senderCount: new Set(observations.map((entry) => entry.sender).filter(Boolean)).size, + eventIds, + latestError, + windowMs: MATRIX_POST_STARTUP_DECRYPT_FAILURE_WINDOW_MS, + }, + } as const; + }, + }; +} + function formatMatrixSelfDecryptionHint(accountId: string): string { return ( "matrix: failed to decrypt a message from this same Matrix user. " + @@ -47,6 +167,8 @@ export function registerMatrixMonitorEvents(params: { warnedEncryptedRooms: Set; warnedCryptoMissingRooms: Set; logger: RuntimeLogger; + startupMs?: number; + startupGraceMs?: number; formatNativeDependencyHint: PluginRuntime["system"]["formatNativeDependencyHint"]; onRoomMessage: (roomId: string, event: MatrixRawEvent) => void | Promise; runDetachedTask?: (label: string, task: () => Promise) => Promise; @@ -64,10 +186,16 @@ export function registerMatrixMonitorEvents(params: { warnedEncryptedRooms, warnedCryptoMissingRooms, logger, + startupMs, + startupGraceMs, formatNativeDependencyHint, onRoomMessage, runDetachedTask, } = params; + const postStartupDecryptFailureTracker = createMatrixPostStartupDecryptFailureTracker({ + startupMs, + startupGraceMs, + }); const { routeVerificationEvent, routeVerificationSummary } = createMatrixVerificationEventRouter({ client, allowFrom, @@ -115,16 +243,42 @@ export function registerMatrixMonitorEvents(params: { client.on( "room.failed_decryption", async (roomId: string, event: MatrixRawEvent, error: Error) => { + const failureState = postStartupDecryptFailureTracker.recordFailure(roomId, event, error); const selfUserId = await resolveMatrixSelfUserId(client, logVerboseMessage); const sender = typeof event.sender === "string" ? event.sender : null; const senderMatchesOwnUser = Boolean(selfUserId && sender && selfUserId === sender); - logger.warn("Failed to decrypt message", { - roomId, - eventId: event.event_id, - sender, - senderMatchesOwnUser, - error: error.message, - }); + logger.warn( + failureState.freshPostStartup + ? "Failed to decrypt fresh post-startup message" + : "Failed to decrypt message", + { + roomId, + eventId: event.event_id, + sender, + senderMatchesOwnUser, + error: error.message, + freshPostStartup: failureState.freshPostStartup, + ...(failureState.freshPostStartup + ? { + postStartupFailureCount: failureState.failureCount, + } + : {}), + }, + ); + if (failureState.warning) { + logger.warn(formatMatrixPostStartupDecryptionHint(auth.accountId), { + roomId, + eventId: event.event_id, + failureCount: failureState.failureCount, + roomCount: failureState.warning.roomCount, + rooms: failureState.warning.rooms, + senderCount: failureState.warning.senderCount, + senders: failureState.warning.senders, + sampleEventIds: failureState.warning.eventIds, + latestError: failureState.warning.latestError, + windowMs: failureState.warning.windowMs, + }); + } if (senderMatchesOwnUser) { logger.warn(formatMatrixSelfDecryptionHint(auth.accountId), { roomId, @@ -133,7 +287,7 @@ export function registerMatrixMonitorEvents(params: { }); } logVerboseMessage( - `matrix: failed decrypt room=${roomId} id=${event.event_id ?? "unknown"} error=${error.message}`, + `matrix: failed decrypt room=${roomId} id=${event.event_id ?? "unknown"} freshPostStartup=${String(failureState.freshPostStartup)} error=${error.message}`, ); }, ); diff --git a/extensions/matrix/src/matrix/monitor/index.ts b/extensions/matrix/src/matrix/monitor/index.ts index 3de022a4b0e..7a951d4fbcd 100644 --- a/extensions/matrix/src/matrix/monitor/index.ts +++ b/extensions/matrix/src/matrix/monitor/index.ts @@ -358,6 +358,8 @@ export async function monitorMatrixProvider(opts: MonitorMatrixOpts = {}): Promi warnedEncryptedRooms, warnedCryptoMissingRooms, logger, + startupMs, + startupGraceMs, formatNativeDependencyHint: core.system.formatNativeDependencyHint, onRoomMessage: handleRoomMessage, runDetachedTask: monitorTaskRunner.runDetachedTask,