diff --git a/CHANGELOG.md b/CHANGELOG.md index e18eaeb55d3..bd7112eb8a7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,6 +39,7 @@ Docs: https://docs.openclaw.ai - Control UI/WebChat: let sidebar markdown code-block Copy buttons use the same delegated clipboard handler as chat messages. (#58709) Thanks @tikitoki. - Discord/streaming: only mark partial draft previews delivered after final edit or fallback delivery succeeds, so failed finalization cleanup removes stale truncated drafts instead of leaving them as the visible reply. Fixes #82035. Thanks @compoodment. - macOS/Gateway: surface leftover `ai.openclaw.update.*` launchd updater jobs in `openclaw gateway status --deep` and doctor so post-update launchd loops point at the stale job cleanup. Fixes #81859. Thanks @BKF-Gitty. +- macOS/screen snapshots: reject malformed `screen.snapshot` params before capture, bound base64 results against the projected `node.invoke.result` frame, and preserve stable caller-facing errors for oversized payloads and capture failures. Fixes #68181. Thanks @shaun0927 and @BunsDev. - Config/doctor: rotate capped `.clobbered.*` repair snapshots by artifact timestamp so repeated repairs keep the newest forensic copy instead of preserving only the first capped set. (#82012) Thanks @Kaspre. - Telegram: initialize the bot before isolated polling drains spooled updates so default isolated polling no longer retries every update with `Bot not initialized` and stalls replies. Fixes #81973. (#81975) Thanks @neeravmakwana. - Telegram: apply method-aware Bot API request timeouts to direct message/action clients so `openclaw message delete --channel telegram` no longer waits on grammY's 500-second default when the API request wedges. Fixes #81908. Thanks @DashLabsDev. diff --git a/apps/macos/Sources/OpenClaw/NodeMode/MacNodeRuntime.swift b/apps/macos/Sources/OpenClaw/NodeMode/MacNodeRuntime.swift index a955c7a1b86..eb18e95d244 100644 --- a/apps/macos/Sources/OpenClaw/NodeMode/MacNodeRuntime.swift +++ b/apps/macos/Sources/OpenClaw/NodeMode/MacNodeRuntime.swift @@ -4,6 +4,8 @@ import OpenClawIPC import OpenClawKit actor MacNodeRuntime { + private static let maxGatewayPayloadBytes = 25 * 1024 * 1024 + private static let maxScreenSnapshotRawBytesBeforeBase64 = (maxGatewayPayloadBytes / 4) * 3 private let cameraCapture = CameraCaptureService() private let makeMainActorServices: () async -> any MacNodeRuntimeMainActorServices private let browserProxyRequest: @Sendable (String?) async throws -> String @@ -363,15 +365,55 @@ actor MacNodeRuntime { } private func handleScreenSnapshotInvoke(_ req: BridgeInvokeRequest) async throws -> BridgeInvokeResponse { - let params = (try? Self.decodeParams(MacNodeScreenSnapshotParams.self, from: req.paramsJSON)) ?? - MacNodeScreenSnapshotParams() + let params: MacNodeScreenSnapshotParams + if let paramsJSON = req.paramsJSON { + do { + params = try Self.decodeParams(MacNodeScreenSnapshotParams.self, from: paramsJSON) + } catch { + return Self.errorResponse( + req, + code: .invalidRequest, + message: "INVALID_REQUEST: invalid screen snapshot params") + } + } else { + params = MacNodeScreenSnapshotParams() + } let services = await self.mainActorServices() let capturedAtMs = Int64(Date().timeIntervalSince1970 * 1000) - let res = try await services.snapshotScreen( - screenIndex: params.screenIndex, - maxWidth: params.maxWidth, - quality: params.quality, - format: params.format) + let res: (data: Data, format: OpenClawScreenSnapshotFormat, width: Int, height: Int) + do { + res = try await services.snapshotScreen( + screenIndex: params.screenIndex, + maxWidth: params.maxWidth, + quality: params.quality, + format: params.format) + } catch let error as ScreenSnapshotService.ScreenSnapshotError { + switch error { + case .noDisplays: + return Self.errorResponse( + req, + code: .invalidRequest, + message: "INVALID_REQUEST: no displays available for screen snapshot") + case let .invalidScreenIndex(idx): + return Self.errorResponse( + req, + code: .invalidRequest, + message: "INVALID_REQUEST: invalid screen index \(idx)") + case .captureFailed, .encodeFailed: + return Self.errorResponse( + req, + code: .unavailable, + message: "UNAVAILABLE: screen snapshot failed") + } + } catch { + return Self.errorResponse( + req, + code: .unavailable, + message: "UNAVAILABLE: screen snapshot failed") + } + if res.data.count > Self.maxScreenSnapshotRawBytesBeforeBase64 { + return Self.screenSnapshotPayloadTooLarge(req) + } struct ScreenSnapshotPayload: Encodable { var format: String var base64: String @@ -387,6 +429,13 @@ actor MacNodeRuntime { height: res.height, screenIndex: params.screenIndex, capturedAtMs: capturedAtMs)) + if try Self.projectedOuterFrameBytes( + forPayloadJSON: payload, + requestId: req.id, + nodeId: req.nodeId) > Self.maxGatewayPayloadBytes + { + return Self.screenSnapshotPayloadTooLarge(req) + } return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload) } @@ -1004,6 +1053,40 @@ extension MacNodeRuntime { return json } + static func projectedOuterFrameBytes( + forPayloadJSON payloadJSON: String, + requestId: String, + nodeId: String?) throws -> Int + { + struct InvokeResultFrame: Encodable { + let type = "req" + let id = "00000000-0000-0000-0000-000000000000" + let method = "node.invoke.result" + let params: Params + + struct Params: Encodable { + let id: String + let nodeId: String + let ok: Bool + let payloadJSON: String + } + } + + let frame = InvokeResultFrame(params: InvokeResultFrame.Params( + id: requestId, + nodeId: nodeId ?? "", + ok: true, + payloadJSON: payloadJSON)) + return try JSONEncoder().encode(frame).count + } + + private static func screenSnapshotPayloadTooLarge(_ req: BridgeInvokeRequest) -> BridgeInvokeResponse { + self.errorResponse( + req, + code: .unavailable, + message: "UNAVAILABLE: screen snapshot payload too large; reduce maxWidth or use jpeg") + } + private nonisolated static func canvasEnabled() -> Bool { UserDefaults.standard.object(forKey: canvasEnabledKey) as? Bool ?? true } diff --git a/apps/macos/Sources/OpenClaw/ScreenSnapshotService.swift b/apps/macos/Sources/OpenClaw/ScreenSnapshotService.swift index 8a5b9f813e9..774c0da5717 100644 --- a/apps/macos/Sources/OpenClaw/ScreenSnapshotService.swift +++ b/apps/macos/Sources/OpenClaw/ScreenSnapshotService.swift @@ -63,7 +63,7 @@ final class ScreenSnapshotService { contentFilter: filter, configuration: config) } catch { - throw ScreenSnapshotError.captureFailed(error.localizedDescription) + throw ScreenSnapshotError.captureFailed("screen capture failed") } let bitmap = NSBitmapImageRep(cgImage: cgImage) diff --git a/apps/macos/Tests/OpenClawIPCTests/MacNodeRuntimeTests.swift b/apps/macos/Tests/OpenClawIPCTests/MacNodeRuntimeTests.swift index a69139f210e..55cb6a01e43 100644 --- a/apps/macos/Tests/OpenClawIPCTests/MacNodeRuntimeTests.swift +++ b/apps/macos/Tests/OpenClawIPCTests/MacNodeRuntimeTests.swift @@ -26,6 +26,78 @@ struct MacNodeRuntimeTests { } } + @MainActor + final class ScreenSnapshotProbeServices: MacNodeRuntimeMainActorServices, @unchecked Sendable { + typealias SnapshotResult = ( + data: Data, + format: OpenClawScreenSnapshotFormat, + width: Int, + height: Int) + + var snapshotCallCount = 0 + var receivedSnapshotParams: MacNodeScreenSnapshotParams? + var snapshotResult: SnapshotResult + var snapshotError: Error? + + init( + snapshotResult: SnapshotResult = (Data("ok".utf8), .jpeg, 10, 10), + snapshotError: Error? = nil) + { + self.snapshotResult = snapshotResult + self.snapshotError = snapshotError + } + + func snapshotScreen( + screenIndex: Int?, + maxWidth: Int?, + quality: Double?, + format: OpenClawScreenSnapshotFormat?) async throws -> SnapshotResult + { + self.snapshotCallCount += 1 + self.receivedSnapshotParams = MacNodeScreenSnapshotParams( + screenIndex: screenIndex, + maxWidth: maxWidth, + quality: quality, + format: format) + if let snapshotError { + throw snapshotError + } + return self.snapshotResult + } + + func recordScreen( + screenIndex: Int?, + durationMs: Int?, + fps: Double?, + includeAudio: Bool?, + outPath: String?) async throws -> (path: String, hasAudio: Bool) + { + let url = FileManager().temporaryDirectory + .appendingPathComponent("openclaw-test-screen-record-\(UUID().uuidString).mp4") + try Data("ok".utf8).write(to: url) + return (path: url.path, hasAudio: false) + } + + func locationAuthorizationStatus() -> CLAuthorizationStatus { + .authorizedAlways + } + + func locationAccuracyAuthorization() -> CLAccuracyAuthorization { + .fullAccuracy + } + + func currentLocation( + desiredAccuracy: OpenClawLocationAccuracy, + maxAgeMs: Int?, + timeoutMs: Int?) async throws -> CLLocation + { + _ = desiredAccuracy + _ = maxAgeMs + _ = timeoutMs + return CLLocation(latitude: 0, longitude: 0) + } + } + @Test func `handle invoke rejects unknown command`() async { let runtime = MacNodeRuntime() let response = await runtime.handleInvoke( @@ -298,6 +370,199 @@ struct MacNodeRuntimeTests { #expect(payload.capturedAtMs <= snapshotCalledAtMs!) } + @Test func `handle invoke screen snapshot rejects malformed params before capture`() async throws { + let services = await MainActor.run { ScreenSnapshotProbeServices() } + let runtime = MacNodeRuntime(makeMainActorServices: { services }) + + let response = await runtime.handleInvoke( + BridgeInvokeRequest( + id: "req-screen-snapshot-invalid", + command: MacNodeScreenCommand.snapshot.rawValue, + paramsJSON: #"{"screenIndex":"#)) + + #expect(response.ok == false) + #expect(response.error?.code == .invalidRequest) + #expect(response.error?.message == "INVALID_REQUEST: invalid screen snapshot params") + let snapshotCallCount = await MainActor.run { services.snapshotCallCount } + #expect(snapshotCallCount == 0) + } + + @Test func `handle invoke screen snapshot keeps nil params as defaults`() async throws { + let services = await MainActor.run { ScreenSnapshotProbeServices() } + let runtime = MacNodeRuntime(makeMainActorServices: { services }) + + let response = await runtime.handleInvoke( + BridgeInvokeRequest( + id: "req-screen-snapshot-defaults", + command: MacNodeScreenCommand.snapshot.rawValue)) + + #expect(response.ok == true) + let received = await MainActor.run { services.receivedSnapshotParams } + #expect(received == MacNodeScreenSnapshotParams()) + } + + @Test func `handle invoke screen snapshot sanitizes capture failures`() async throws { + struct SensitiveError: LocalizedError { + let detail: String + var errorDescription: String? { detail } + } + + let services = await MainActor.run { + ScreenSnapshotProbeServices(snapshotError: SensitiveError(detail: "TCC_DENIED display-id=ABC123")) + } + let runtime = MacNodeRuntime(makeMainActorServices: { services }) + + let response = await runtime.handleInvoke( + BridgeInvokeRequest( + id: "req-screen-snapshot-error", + command: MacNodeScreenCommand.snapshot.rawValue)) + + #expect(response.ok == false) + #expect(response.error?.code == .unavailable) + #expect(response.error?.message == "UNAVAILABLE: screen snapshot failed") + } + + @Test func `handle invoke screen snapshot reports validation failures as invalid request`() async throws { + let invalidIndexServices = await MainActor.run { + ScreenSnapshotProbeServices( + snapshotError: ScreenSnapshotService.ScreenSnapshotError.invalidScreenIndex(4)) + } + let invalidIndexRuntime = MacNodeRuntime(makeMainActorServices: { invalidIndexServices }) + let invalidIndexResponse = await invalidIndexRuntime.handleInvoke( + BridgeInvokeRequest( + id: "req-screen-snapshot-bad-index", + command: MacNodeScreenCommand.snapshot.rawValue)) + + #expect(invalidIndexResponse.ok == false) + #expect(invalidIndexResponse.error?.code == .invalidRequest) + #expect(invalidIndexResponse.error?.message == "INVALID_REQUEST: invalid screen index 4") + + let noDisplaysServices = await MainActor.run { + ScreenSnapshotProbeServices(snapshotError: ScreenSnapshotService.ScreenSnapshotError.noDisplays) + } + let noDisplaysRuntime = MacNodeRuntime(makeMainActorServices: { noDisplaysServices }) + let noDisplaysResponse = await noDisplaysRuntime.handleInvoke( + BridgeInvokeRequest( + id: "req-screen-snapshot-no-displays", + command: MacNodeScreenCommand.snapshot.rawValue)) + + #expect(noDisplaysResponse.ok == false) + #expect(noDisplaysResponse.error?.code == .invalidRequest) + #expect( + noDisplaysResponse.error?.message == + "INVALID_REQUEST: no displays available for screen snapshot") + } + + @Test func `handle invoke screen snapshot rejects raw payloads above base64 ceiling`() async throws { + let payloadSize = 19_660_801 + let services = await MainActor.run { + ScreenSnapshotProbeServices(snapshotResult: ( + Data(repeating: 0x41, count: payloadSize), + .jpeg, + 4000, + 3000)) + } + let runtime = MacNodeRuntime(makeMainActorServices: { services }) + + let response = await runtime.handleInvoke( + BridgeInvokeRequest( + id: "req-screen-snapshot-too-large", + command: MacNodeScreenCommand.snapshot.rawValue)) + + #expect(response.ok == false) + #expect(response.payloadJSON == nil) + #expect(response.error?.code == .unavailable) + #expect( + response.error?.message == + "UNAVAILABLE: screen snapshot payload too large; reduce maxWidth or use jpeg") + } + + @Test func `handle invoke screen snapshot rejects escaped oversized outer frames`() async throws { + let payloadSize = 12 * 1024 * 1024 + let services = await MainActor.run { + ScreenSnapshotProbeServices(snapshotResult: ( + Data(repeating: 0xFF, count: payloadSize), + .png, + 4000, + 3000)) + } + let runtime = MacNodeRuntime(makeMainActorServices: { services }) + + let response = await runtime.handleInvoke( + BridgeInvokeRequest( + id: "req-screen-snapshot-slash-heavy", + command: MacNodeScreenCommand.snapshot.rawValue, + nodeId: "node-slash-heavy")) + + #expect(response.ok == false) + #expect(response.error?.code == .unavailable) + #expect( + response.error?.message == + "UNAVAILABLE: screen snapshot payload too large; reduce maxWidth or use jpeg") + } + + @Test func `handle invoke screen snapshot accepts near-limit frames that fit`() async throws { + let payloadSize = 19_660_100 + let services = await MainActor.run { + ScreenSnapshotProbeServices(snapshotResult: ( + Data(repeating: 0x00, count: payloadSize), + .jpeg, + 4000, + 3000)) + } + let runtime = MacNodeRuntime(makeMainActorServices: { services }) + + let response = await runtime.handleInvoke( + BridgeInvokeRequest( + id: "req-fit", + command: MacNodeScreenCommand.snapshot.rawValue, + nodeId: "node-fit")) + + #expect(response.ok == true) + let payloadJSON = try #require(response.payloadJSON) + let projected = try MacNodeRuntime.projectedOuterFrameBytes( + forPayloadJSON: payloadJSON, + requestId: "req-fit", + nodeId: "node-fit") + #expect(projected < 25 * 1024 * 1024) + } + + @Test func `projected outer frame bytes accounts for dynamic node id escaping`() throws { + let inner = "{\"format\":\"png\",\"note\":\"\u{0001}\u{0002}\n\t\\\"raw\\\"\",\"width\":1,\"height\":1,\"capturedAtMs\":0}" + let projected = try MacNodeRuntime.projectedOuterFrameBytes( + forPayloadJSON: inner, + requestId: "req-control", + nodeId: "node-\u{0001}\u{0002}\u{0003}\n\t-id") + + struct Frame: Encodable { + let type = "req" + let id = "00000000-0000-0000-0000-000000000000" + let method = "node.invoke.result" + let params: Params + + struct Params: Encodable { + let id: String + let nodeId: String + let ok: Bool + let payloadJSON: String + } + } + let serialized = try JSONEncoder().encode(Frame(params: Frame.Params( + id: "req-control", + nodeId: "node-\u{0001}\u{0002}\u{0003}\n\t-id", + ok: true, + payloadJSON: inner))) + + #expect(projected == serialized.count) + + let controlHeavyNodeId = String(repeating: "\u{0001}", count: 5 * 1024 * 1024) + let controlHeavyProjection = try MacNodeRuntime.projectedOuterFrameBytes( + forPayloadJSON: "{}", + requestId: "req-control", + nodeId: controlHeavyNodeId) + #expect(controlHeavyProjection > 25 * 1024 * 1024) + } + @Test func `handle invoke browser proxy uses injected request`() async { let runtime = MacNodeRuntime(browserProxyRequest: { paramsJSON in #expect(paramsJSON?.contains("/tabs") == true) diff --git a/apps/shared/OpenClawKit/Sources/OpenClawKit/BridgeFrames.swift b/apps/shared/OpenClawKit/Sources/OpenClawKit/BridgeFrames.swift index debcec3ae87..79d1a7369e6 100644 --- a/apps/shared/OpenClawKit/Sources/OpenClawKit/BridgeFrames.swift +++ b/apps/shared/OpenClawKit/Sources/OpenClawKit/BridgeFrames.swift @@ -13,12 +13,20 @@ public struct BridgeInvokeRequest: Codable, Sendable { public let id: String public let command: String public let paramsJSON: String? + public let nodeId: String? - public init(type: String = "invoke", id: String, command: String, paramsJSON: String? = nil) { + public init( + type: String = "invoke", + id: String, + command: String, + paramsJSON: String? = nil, + nodeId: String? = nil) + { self.type = type self.id = id self.command = command self.paramsJSON = paramsJSON + self.nodeId = nodeId } } diff --git a/apps/shared/OpenClawKit/Sources/OpenClawKit/GatewayNodeSession.swift b/apps/shared/OpenClawKit/Sources/OpenClawKit/GatewayNodeSession.swift index 19730692846..18fd7904556 100644 --- a/apps/shared/OpenClawKit/Sources/OpenClawKit/GatewayNodeSession.swift +++ b/apps/shared/OpenClawKit/Sources/OpenClawKit/GatewayNodeSession.swift @@ -457,7 +457,8 @@ public actor GatewayNodeSession { let req = BridgeInvokeRequest( id: request.id, command: request.command, - paramsJSON: request.paramsJSON) + paramsJSON: request.paramsJSON, + nodeId: request.nodeId) self.logger.info("node invoke executing id=\(request.id, privacy: .public)") let response = await Self.invokeWithTimeout( request: req,