fix(macos): harden screen.snapshot validation and payload bounds

Fixes #68181.

Rejects malformed macOS screen.snapshot params before capture, sanitizes capture failures, and bounds inline base64 snapshot responses against the projected node.invoke.result frame size.

Supersedes #68186.
This commit is contained in:
Val Alexander
2026-05-15 02:27:33 -05:00
committed by GitHub
parent 66ba611f5b
commit 5f89cabeb5
6 changed files with 368 additions and 10 deletions

View File

@@ -39,6 +39,7 @@ Docs: https://docs.openclaw.ai
- Control UI/WebChat: let sidebar markdown code-block Copy buttons use the same delegated clipboard handler as chat messages. (#58709) Thanks @tikitoki.
- Discord/streaming: only mark partial draft previews delivered after final edit or fallback delivery succeeds, so failed finalization cleanup removes stale truncated drafts instead of leaving them as the visible reply. Fixes #82035. Thanks @compoodment.
- macOS/Gateway: surface leftover `ai.openclaw.update.*` launchd updater jobs in `openclaw gateway status --deep` and doctor so post-update launchd loops point at the stale job cleanup. Fixes #81859. Thanks @BKF-Gitty.
- macOS/screen snapshots: reject malformed `screen.snapshot` params before capture, bound base64 results against the projected `node.invoke.result` frame, and preserve stable caller-facing errors for oversized payloads and capture failures. Fixes #68181. Thanks @shaun0927 and @BunsDev.
- Config/doctor: rotate capped `.clobbered.*` repair snapshots by artifact timestamp so repeated repairs keep the newest forensic copy instead of preserving only the first capped set. (#82012) Thanks @Kaspre.
- Telegram: initialize the bot before isolated polling drains spooled updates so default isolated polling no longer retries every update with `Bot not initialized` and stalls replies. Fixes #81973. (#81975) Thanks @neeravmakwana.
- Telegram: apply method-aware Bot API request timeouts to direct message/action clients so `openclaw message delete --channel telegram` no longer waits on grammY's 500-second default when the API request wedges. Fixes #81908. Thanks @DashLabsDev.

View File

@@ -4,6 +4,8 @@ import OpenClawIPC
import OpenClawKit
actor MacNodeRuntime {
private static let maxGatewayPayloadBytes = 25 * 1024 * 1024
private static let maxScreenSnapshotRawBytesBeforeBase64 = (maxGatewayPayloadBytes / 4) * 3
private let cameraCapture = CameraCaptureService()
private let makeMainActorServices: () async -> any MacNodeRuntimeMainActorServices
private let browserProxyRequest: @Sendable (String?) async throws -> String
@@ -363,15 +365,55 @@ actor MacNodeRuntime {
}
private func handleScreenSnapshotInvoke(_ req: BridgeInvokeRequest) async throws -> BridgeInvokeResponse {
let params = (try? Self.decodeParams(MacNodeScreenSnapshotParams.self, from: req.paramsJSON)) ??
MacNodeScreenSnapshotParams()
let params: MacNodeScreenSnapshotParams
if let paramsJSON = req.paramsJSON {
do {
params = try Self.decodeParams(MacNodeScreenSnapshotParams.self, from: paramsJSON)
} catch {
return Self.errorResponse(
req,
code: .invalidRequest,
message: "INVALID_REQUEST: invalid screen snapshot params")
}
} else {
params = MacNodeScreenSnapshotParams()
}
let services = await self.mainActorServices()
let capturedAtMs = Int64(Date().timeIntervalSince1970 * 1000)
let res = try await services.snapshotScreen(
screenIndex: params.screenIndex,
maxWidth: params.maxWidth,
quality: params.quality,
format: params.format)
let res: (data: Data, format: OpenClawScreenSnapshotFormat, width: Int, height: Int)
do {
res = try await services.snapshotScreen(
screenIndex: params.screenIndex,
maxWidth: params.maxWidth,
quality: params.quality,
format: params.format)
} catch let error as ScreenSnapshotService.ScreenSnapshotError {
switch error {
case .noDisplays:
return Self.errorResponse(
req,
code: .invalidRequest,
message: "INVALID_REQUEST: no displays available for screen snapshot")
case let .invalidScreenIndex(idx):
return Self.errorResponse(
req,
code: .invalidRequest,
message: "INVALID_REQUEST: invalid screen index \(idx)")
case .captureFailed, .encodeFailed:
return Self.errorResponse(
req,
code: .unavailable,
message: "UNAVAILABLE: screen snapshot failed")
}
} catch {
return Self.errorResponse(
req,
code: .unavailable,
message: "UNAVAILABLE: screen snapshot failed")
}
if res.data.count > Self.maxScreenSnapshotRawBytesBeforeBase64 {
return Self.screenSnapshotPayloadTooLarge(req)
}
struct ScreenSnapshotPayload: Encodable {
var format: String
var base64: String
@@ -387,6 +429,13 @@ actor MacNodeRuntime {
height: res.height,
screenIndex: params.screenIndex,
capturedAtMs: capturedAtMs))
if try Self.projectedOuterFrameBytes(
forPayloadJSON: payload,
requestId: req.id,
nodeId: req.nodeId) > Self.maxGatewayPayloadBytes
{
return Self.screenSnapshotPayloadTooLarge(req)
}
return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload)
}
@@ -1004,6 +1053,40 @@ extension MacNodeRuntime {
return json
}
static func projectedOuterFrameBytes(
forPayloadJSON payloadJSON: String,
requestId: String,
nodeId: String?) throws -> Int
{
struct InvokeResultFrame: Encodable {
let type = "req"
let id = "00000000-0000-0000-0000-000000000000"
let method = "node.invoke.result"
let params: Params
struct Params: Encodable {
let id: String
let nodeId: String
let ok: Bool
let payloadJSON: String
}
}
let frame = InvokeResultFrame(params: InvokeResultFrame.Params(
id: requestId,
nodeId: nodeId ?? "",
ok: true,
payloadJSON: payloadJSON))
return try JSONEncoder().encode(frame).count
}
private static func screenSnapshotPayloadTooLarge(_ req: BridgeInvokeRequest) -> BridgeInvokeResponse {
self.errorResponse(
req,
code: .unavailable,
message: "UNAVAILABLE: screen snapshot payload too large; reduce maxWidth or use jpeg")
}
private nonisolated static func canvasEnabled() -> Bool {
UserDefaults.standard.object(forKey: canvasEnabledKey) as? Bool ?? true
}

View File

@@ -63,7 +63,7 @@ final class ScreenSnapshotService {
contentFilter: filter,
configuration: config)
} catch {
throw ScreenSnapshotError.captureFailed(error.localizedDescription)
throw ScreenSnapshotError.captureFailed("screen capture failed")
}
let bitmap = NSBitmapImageRep(cgImage: cgImage)

View File

@@ -26,6 +26,78 @@ struct MacNodeRuntimeTests {
}
}
@MainActor
final class ScreenSnapshotProbeServices: MacNodeRuntimeMainActorServices, @unchecked Sendable {
typealias SnapshotResult = (
data: Data,
format: OpenClawScreenSnapshotFormat,
width: Int,
height: Int)
var snapshotCallCount = 0
var receivedSnapshotParams: MacNodeScreenSnapshotParams?
var snapshotResult: SnapshotResult
var snapshotError: Error?
init(
snapshotResult: SnapshotResult = (Data("ok".utf8), .jpeg, 10, 10),
snapshotError: Error? = nil)
{
self.snapshotResult = snapshotResult
self.snapshotError = snapshotError
}
func snapshotScreen(
screenIndex: Int?,
maxWidth: Int?,
quality: Double?,
format: OpenClawScreenSnapshotFormat?) async throws -> SnapshotResult
{
self.snapshotCallCount += 1
self.receivedSnapshotParams = MacNodeScreenSnapshotParams(
screenIndex: screenIndex,
maxWidth: maxWidth,
quality: quality,
format: format)
if let snapshotError {
throw snapshotError
}
return self.snapshotResult
}
func recordScreen(
screenIndex: Int?,
durationMs: Int?,
fps: Double?,
includeAudio: Bool?,
outPath: String?) async throws -> (path: String, hasAudio: Bool)
{
let url = FileManager().temporaryDirectory
.appendingPathComponent("openclaw-test-screen-record-\(UUID().uuidString).mp4")
try Data("ok".utf8).write(to: url)
return (path: url.path, hasAudio: false)
}
func locationAuthorizationStatus() -> CLAuthorizationStatus {
.authorizedAlways
}
func locationAccuracyAuthorization() -> CLAccuracyAuthorization {
.fullAccuracy
}
func currentLocation(
desiredAccuracy: OpenClawLocationAccuracy,
maxAgeMs: Int?,
timeoutMs: Int?) async throws -> CLLocation
{
_ = desiredAccuracy
_ = maxAgeMs
_ = timeoutMs
return CLLocation(latitude: 0, longitude: 0)
}
}
@Test func `handle invoke rejects unknown command`() async {
let runtime = MacNodeRuntime()
let response = await runtime.handleInvoke(
@@ -298,6 +370,199 @@ struct MacNodeRuntimeTests {
#expect(payload.capturedAtMs <= snapshotCalledAtMs!)
}
@Test func `handle invoke screen snapshot rejects malformed params before capture`() async throws {
let services = await MainActor.run { ScreenSnapshotProbeServices() }
let runtime = MacNodeRuntime(makeMainActorServices: { services })
let response = await runtime.handleInvoke(
BridgeInvokeRequest(
id: "req-screen-snapshot-invalid",
command: MacNodeScreenCommand.snapshot.rawValue,
paramsJSON: #"{"screenIndex":"#))
#expect(response.ok == false)
#expect(response.error?.code == .invalidRequest)
#expect(response.error?.message == "INVALID_REQUEST: invalid screen snapshot params")
let snapshotCallCount = await MainActor.run { services.snapshotCallCount }
#expect(snapshotCallCount == 0)
}
@Test func `handle invoke screen snapshot keeps nil params as defaults`() async throws {
let services = await MainActor.run { ScreenSnapshotProbeServices() }
let runtime = MacNodeRuntime(makeMainActorServices: { services })
let response = await runtime.handleInvoke(
BridgeInvokeRequest(
id: "req-screen-snapshot-defaults",
command: MacNodeScreenCommand.snapshot.rawValue))
#expect(response.ok == true)
let received = await MainActor.run { services.receivedSnapshotParams }
#expect(received == MacNodeScreenSnapshotParams())
}
@Test func `handle invoke screen snapshot sanitizes capture failures`() async throws {
struct SensitiveError: LocalizedError {
let detail: String
var errorDescription: String? { detail }
}
let services = await MainActor.run {
ScreenSnapshotProbeServices(snapshotError: SensitiveError(detail: "TCC_DENIED display-id=ABC123"))
}
let runtime = MacNodeRuntime(makeMainActorServices: { services })
let response = await runtime.handleInvoke(
BridgeInvokeRequest(
id: "req-screen-snapshot-error",
command: MacNodeScreenCommand.snapshot.rawValue))
#expect(response.ok == false)
#expect(response.error?.code == .unavailable)
#expect(response.error?.message == "UNAVAILABLE: screen snapshot failed")
}
@Test func `handle invoke screen snapshot reports validation failures as invalid request`() async throws {
let invalidIndexServices = await MainActor.run {
ScreenSnapshotProbeServices(
snapshotError: ScreenSnapshotService.ScreenSnapshotError.invalidScreenIndex(4))
}
let invalidIndexRuntime = MacNodeRuntime(makeMainActorServices: { invalidIndexServices })
let invalidIndexResponse = await invalidIndexRuntime.handleInvoke(
BridgeInvokeRequest(
id: "req-screen-snapshot-bad-index",
command: MacNodeScreenCommand.snapshot.rawValue))
#expect(invalidIndexResponse.ok == false)
#expect(invalidIndexResponse.error?.code == .invalidRequest)
#expect(invalidIndexResponse.error?.message == "INVALID_REQUEST: invalid screen index 4")
let noDisplaysServices = await MainActor.run {
ScreenSnapshotProbeServices(snapshotError: ScreenSnapshotService.ScreenSnapshotError.noDisplays)
}
let noDisplaysRuntime = MacNodeRuntime(makeMainActorServices: { noDisplaysServices })
let noDisplaysResponse = await noDisplaysRuntime.handleInvoke(
BridgeInvokeRequest(
id: "req-screen-snapshot-no-displays",
command: MacNodeScreenCommand.snapshot.rawValue))
#expect(noDisplaysResponse.ok == false)
#expect(noDisplaysResponse.error?.code == .invalidRequest)
#expect(
noDisplaysResponse.error?.message ==
"INVALID_REQUEST: no displays available for screen snapshot")
}
@Test func `handle invoke screen snapshot rejects raw payloads above base64 ceiling`() async throws {
let payloadSize = 19_660_801
let services = await MainActor.run {
ScreenSnapshotProbeServices(snapshotResult: (
Data(repeating: 0x41, count: payloadSize),
.jpeg,
4000,
3000))
}
let runtime = MacNodeRuntime(makeMainActorServices: { services })
let response = await runtime.handleInvoke(
BridgeInvokeRequest(
id: "req-screen-snapshot-too-large",
command: MacNodeScreenCommand.snapshot.rawValue))
#expect(response.ok == false)
#expect(response.payloadJSON == nil)
#expect(response.error?.code == .unavailable)
#expect(
response.error?.message ==
"UNAVAILABLE: screen snapshot payload too large; reduce maxWidth or use jpeg")
}
@Test func `handle invoke screen snapshot rejects escaped oversized outer frames`() async throws {
let payloadSize = 12 * 1024 * 1024
let services = await MainActor.run {
ScreenSnapshotProbeServices(snapshotResult: (
Data(repeating: 0xFF, count: payloadSize),
.png,
4000,
3000))
}
let runtime = MacNodeRuntime(makeMainActorServices: { services })
let response = await runtime.handleInvoke(
BridgeInvokeRequest(
id: "req-screen-snapshot-slash-heavy",
command: MacNodeScreenCommand.snapshot.rawValue,
nodeId: "node-slash-heavy"))
#expect(response.ok == false)
#expect(response.error?.code == .unavailable)
#expect(
response.error?.message ==
"UNAVAILABLE: screen snapshot payload too large; reduce maxWidth or use jpeg")
}
@Test func `handle invoke screen snapshot accepts near-limit frames that fit`() async throws {
let payloadSize = 19_660_100
let services = await MainActor.run {
ScreenSnapshotProbeServices(snapshotResult: (
Data(repeating: 0x00, count: payloadSize),
.jpeg,
4000,
3000))
}
let runtime = MacNodeRuntime(makeMainActorServices: { services })
let response = await runtime.handleInvoke(
BridgeInvokeRequest(
id: "req-fit",
command: MacNodeScreenCommand.snapshot.rawValue,
nodeId: "node-fit"))
#expect(response.ok == true)
let payloadJSON = try #require(response.payloadJSON)
let projected = try MacNodeRuntime.projectedOuterFrameBytes(
forPayloadJSON: payloadJSON,
requestId: "req-fit",
nodeId: "node-fit")
#expect(projected < 25 * 1024 * 1024)
}
@Test func `projected outer frame bytes accounts for dynamic node id escaping`() throws {
let inner = "{\"format\":\"png\",\"note\":\"\u{0001}\u{0002}\n\t\\\"raw\\\"\",\"width\":1,\"height\":1,\"capturedAtMs\":0}"
let projected = try MacNodeRuntime.projectedOuterFrameBytes(
forPayloadJSON: inner,
requestId: "req-control",
nodeId: "node-\u{0001}\u{0002}\u{0003}\n\t-id")
struct Frame: Encodable {
let type = "req"
let id = "00000000-0000-0000-0000-000000000000"
let method = "node.invoke.result"
let params: Params
struct Params: Encodable {
let id: String
let nodeId: String
let ok: Bool
let payloadJSON: String
}
}
let serialized = try JSONEncoder().encode(Frame(params: Frame.Params(
id: "req-control",
nodeId: "node-\u{0001}\u{0002}\u{0003}\n\t-id",
ok: true,
payloadJSON: inner)))
#expect(projected == serialized.count)
let controlHeavyNodeId = String(repeating: "\u{0001}", count: 5 * 1024 * 1024)
let controlHeavyProjection = try MacNodeRuntime.projectedOuterFrameBytes(
forPayloadJSON: "{}",
requestId: "req-control",
nodeId: controlHeavyNodeId)
#expect(controlHeavyProjection > 25 * 1024 * 1024)
}
@Test func `handle invoke browser proxy uses injected request`() async {
let runtime = MacNodeRuntime(browserProxyRequest: { paramsJSON in
#expect(paramsJSON?.contains("/tabs") == true)

View File

@@ -13,12 +13,20 @@ public struct BridgeInvokeRequest: Codable, Sendable {
public let id: String
public let command: String
public let paramsJSON: String?
public let nodeId: String?
public init(type: String = "invoke", id: String, command: String, paramsJSON: String? = nil) {
public init(
type: String = "invoke",
id: String,
command: String,
paramsJSON: String? = nil,
nodeId: String? = nil)
{
self.type = type
self.id = id
self.command = command
self.paramsJSON = paramsJSON
self.nodeId = nodeId
}
}

View File

@@ -457,7 +457,8 @@ public actor GatewayNodeSession {
let req = BridgeInvokeRequest(
id: request.id,
command: request.command,
paramsJSON: request.paramsJSON)
paramsJSON: request.paramsJSON,
nodeId: request.nodeId)
self.logger.info("node invoke executing id=\(request.id, privacy: .public)")
let response = await Self.invokeWithTimeout(
request: req,