diff --git a/CHANGELOG.md b/CHANGELOG.md
index d5099a84edf..c006332a433 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,7 @@ Docs: https://docs.openclaw.ai
 - WhatsApp: close long-lived web sockets through Baileys `end(error)` before falling back to raw websocket close, so listener teardown runs Baileys cleanup instead of leaving zombie sockets. Fixes #52442. Thanks @essendigitalgroup-cyber.
 - Twitch/plugins: emit a flat JSON Schema for Twitch channel config so single-account and multi-account configs validate before runtime load, and add source-checkout diagnostics for missing pnpm workspace dependencies. Thanks @vincentkoc.
 - Gateway/sessions: move hot transcript reads and mirror appends onto async bounded IO with serialized parent-linked writes, keeping large session histories from stalling Gateway requests and channel replies. Fixes #75656. Thanks @DerFlash.
+- macOS/Talk Mode: downmix multi-channel microphone buffers before handing them to Apple Speech across Push-to-Talk, Talk Mode, Voice Wake, and the wake-word tester, so pro audio interfaces no longer produce empty transcripts. Fixes #42533. Thanks @jbuecker.
 - macOS/Talk Mode: subscribe native WebChat to active-session transcript updates and render external spoken user turns in the chat thread instead of only showing assistant replies. Fixes #75155. Thanks @SledderBling.
 - macOS/Voice Wake: accept trigger-only phrases in the built-in Voice Wake test, matching the settings UI and runtime trigger-only path instead of requiring extra command text after the wake word. Fixes #64986. Thanks @zoiks65.
 - Cron/TTS: run cron announce payloads through the normal TTS directive transform before outbound delivery, so scheduled `[[tts]]` replies generate voice payloads instead of leaking raw tags. Fixes #52125. Thanks @kenchen3000.
diff --git a/apps/macos/Sources/OpenClaw/SpeechAudioBufferNormalizer.swift b/apps/macos/Sources/OpenClaw/SpeechAudioBufferNormalizer.swift
new file mode 100644
index 00000000000..ac3ed4d36fd
--- /dev/null
+++ b/apps/macos/Sources/OpenClaw/SpeechAudioBufferNormalizer.swift
@@ -0,0 +1,86 @@
+@preconcurrency import AVFoundation
+
+enum SpeechAudioBufferNormalizer {
+    static func speechCompatibleBuffer(from buffer: AVAudioPCMBuffer) -> AVAudioPCMBuffer {
+        let format = buffer.format
+        guard format.channelCount > 2, format.sampleRate > 0 else {
+            return buffer
+        }
+        return self.downmixFloatBuffer(buffer) ?? self.convertBuffer(buffer) ?? buffer
+    }
+
+    private static func downmixFloatBuffer(_ buffer: AVAudioPCMBuffer) -> AVAudioPCMBuffer? {
+        let format = buffer.format
+        guard format.commonFormat == .pcmFormatFloat32,
+              !format.isInterleaved,
+              let source = buffer.floatChannelData,
+              let targetFormat = AVAudioFormat(
+                  commonFormat: .pcmFormatFloat32,
+                  sampleRate: format.sampleRate,
+                  channels: 1,
+                  interleaved: false),
+              let output = AVAudioPCMBuffer(
+                  pcmFormat: targetFormat,
+                  frameCapacity: buffer.frameCapacity),
+              let target = output.floatChannelData?[0]
+        else {
+            return nil
+        }
+
+        output.frameLength = buffer.frameLength
+        let channelCount = Int(format.channelCount)
+        let frameCount = Int(buffer.frameLength)
+        guard channelCount > 0, frameCount > 0 else { return output }
+
+        let scale = 1.0 / Float(channelCount)
+        for frame in 0..<frameCount {
+            var sum: Float = 0
+            for channel in 0..<channelCount {
+                sum += source[channel][frame]
+            }
+            target[frame] = sum * scale
+        }
+        return output
+    }
+
+    private static func convertBuffer(_ buffer: AVAudioPCMBuffer) -> AVAudioPCMBuffer? {
+        guard let targetFormat = AVAudioFormat(
+            commonFormat: .pcmFormatFloat32,
+            sampleRate: buffer.format.sampleRate,
+            channels: 1,
+            interleaved: false),
+            let converter = AVAudioConverter(from: buffer.format, to: targetFormat)
+        else {
+            return nil
+        }
+
+        let frameCapacity = AVAudioFrameCount(
+            max(1, ceil(Double(buffer.frameLength) * targetFormat.sampleRate / buffer.format.sampleRate)))
+        guard let output = AVAudioPCMBuffer(pcmFormat: targetFormat, frameCapacity: frameCapacity) else {
+            return nil
+        }
+
+        let input = ConverterInput(buffer)
+        var error: NSError?
+        let status = converter.convert(to: output, error: &error) { _, outStatus in
+            if input.didProvide {
+                outStatus.pointee = .noDataNow
+                return nil
+            }
+            input.didProvide = true
+            outStatus.pointee = .haveData
+            return input.buffer
+        }
+        guard status != .error else { return nil }
+        return output
+    }
+
+    private final class ConverterInput: @unchecked Sendable {
+        let buffer: AVAudioPCMBuffer
+        var didProvide = false
+
+        init(_ buffer: AVAudioPCMBuffer) {
+            self.buffer = buffer
+        }
+    }
+}
diff --git a/apps/macos/Sources/OpenClaw/TalkModeRuntime.swift b/apps/macos/Sources/OpenClaw/TalkModeRuntime.swift
index 8ec32302138..d61e0f1b034 100644
--- a/apps/macos/Sources/OpenClaw/TalkModeRuntime.swift
+++ b/apps/macos/Sources/OpenClaw/TalkModeRuntime.swift
@@ -225,7 +225,7 @@ actor TalkModeRuntime {
         input.removeTap(onBus: 0)
         let meter = self.rmsMeter
         input.installTap(onBus: 0, bufferSize: 2048, format: format) { [weak request, meter] buffer, _ in
-            request?.append(buffer)
+            request?.append(SpeechAudioBufferNormalizer.speechCompatibleBuffer(from: buffer))
             if let rms = Self.rmsLevel(buffer: buffer) {
                 meter.set(rms)
             }
diff --git a/apps/macos/Sources/OpenClaw/VoicePushToTalk.swift b/apps/macos/Sources/OpenClaw/VoicePushToTalk.swift
index 872dcc224a6..efc6eaa7eec 100644
--- a/apps/macos/Sources/OpenClaw/VoicePushToTalk.swift
+++ b/apps/macos/Sources/OpenClaw/VoicePushToTalk.swift
@@ -260,9 +260,9 @@ actor VoicePushToTalk {
             input.removeTap(onBus: 0)
             self.tapInstalled = false
         }
-        // Pipe raw mic buffers into the Speech request while the chord is held.
+        // Pipe Speech-compatible mic buffers into the request while the chord is held.
         input.installTap(onBus: 0, bufferSize: 2048, format: format) { [weak request] buffer, _ in
-            request?.append(buffer)
+            request?.append(SpeechAudioBufferNormalizer.speechCompatibleBuffer(from: buffer))
         }
         self.tapInstalled = true
 
diff --git a/apps/macos/Sources/OpenClaw/VoiceWakeRuntime.swift b/apps/macos/Sources/OpenClaw/VoiceWakeRuntime.swift
index 3db6a92146f..644a9bd0717 100644
--- a/apps/macos/Sources/OpenClaw/VoiceWakeRuntime.swift
+++ b/apps/macos/Sources/OpenClaw/VoiceWakeRuntime.swift
@@ -187,7 +187,7 @@ actor VoiceWakeRuntime {
             }
             input.removeTap(onBus: 0)
             input.installTap(onBus: 0, bufferSize: 2048, format: format) { [weak self, weak request] buffer, _ in
-                request?.append(buffer)
+                request?.append(SpeechAudioBufferNormalizer.speechCompatibleBuffer(from: buffer))
                 guard let rms = Self.rmsLevel(buffer: buffer) else { return }
                 Task.detached { [weak self] in
                     await self?.noteAudioLevel(rms: rms)
diff --git a/apps/macos/Sources/OpenClaw/VoiceWakeTester.swift b/apps/macos/Sources/OpenClaw/VoiceWakeTester.swift
index 2eff26b7765..9971b0b9cd1 100644
--- a/apps/macos/Sources/OpenClaw/VoiceWakeTester.swift
+++ b/apps/macos/Sources/OpenClaw/VoiceWakeTester.swift
@@ -116,7 +116,7 @@ final class VoiceWakeTester {
         }
         inputNode.removeTap(onBus: 0)
         inputNode.installTap(onBus: 0, bufferSize: 2048, format: format) { [weak request] buffer, _ in
-            request?.append(buffer)
+            request?.append(SpeechAudioBufferNormalizer.speechCompatibleBuffer(from: buffer))
         }
 
         engine.prepare()
diff --git a/apps/macos/Sources/OpenClaw/VoiceWakeTextUtils.swift b/apps/macos/Sources/OpenClaw/VoiceWakeTextUtils.swift
index d2f2aaf017b..89652cd52bf 100644
--- a/apps/macos/Sources/OpenClaw/VoiceWakeTextUtils.swift
+++ b/apps/macos/Sources/OpenClaw/VoiceWakeTextUtils.swift
@@ -145,6 +145,7 @@ enum VoiceWakeTextUtils {
             || self.hasOnlyFillerBeforeTrigger(transcript: transcript, triggers: triggers)
         else { return nil }
         let trimmed = trimWake(transcript, triggers)
+        guard !self.isFillerOnly(trimmed) else { return nil }
         guard trimmed.count >= minCommandLength else { return nil }
         return trimmed
     }
@@ -159,7 +160,8 @@ enum VoiceWakeTextUtils {
             self.startsWithTrigger(transcript: transcript, triggers: triggers)
             || self.hasOnlyFillerBeforeTrigger(transcript: transcript, triggers: triggers)
         else { return false }
-        return trimWake(transcript, triggers).isEmpty
+        let trimmed = trimWake(transcript, triggers)
+        return trimmed.isEmpty || self.isFillerOnly(trimmed)
     }
 
     static func hasOnlyFillerBeforeTrigger(transcript: String, triggers: [String]) -> Bool {
@@ -173,6 +175,16 @@ enum VoiceWakeTextUtils {
         return prefixTokens.allSatisfy { self.wakePrefixFillers.contains($0) }
     }
 
+    private static func isFillerOnly(_ text: String) -> Bool {
+        let tokens = text
+            .split(whereSeparator: {
+                $0.isWhitespace || self.whitespaceAndPunctuation.contains($0.unicodeScalars.first!)
+            })
+            .map { self.normalizeToken(String($0)) }
+            .filter { !$0.isEmpty }
+        return !tokens.isEmpty && tokens.allSatisfy { self.wakePrefixFillers.contains($0) }
+    }
+
     static func matchedTriggerWord(transcript: String, triggers: [String]) -> String? {
         if let rawMatch = self.bestRawTriggerMatch(transcript: transcript, triggers: triggers) {
             return rawMatch.normalizedTrigger
diff --git a/apps/macos/Tests/OpenClawIPCTests/VoicePushToTalkTests.swift b/apps/macos/Tests/OpenClawIPCTests/VoicePushToTalkTests.swift
index aeb1d700474..7784b009a40 100644
--- a/apps/macos/Tests/OpenClawIPCTests/VoicePushToTalkTests.swift
+++ b/apps/macos/Tests/OpenClawIPCTests/VoicePushToTalkTests.swift
@@ -1,7 +1,50 @@
+import AVFoundation
 import Testing
 @testable import OpenClaw
 
 struct VoicePushToTalkTests {
+    @Test func `speech normalizer passes through mono buffers`() throws {
+        let format = try #require(AVAudioFormat(
+            commonFormat: .pcmFormatFloat32,
+            sampleRate: 16_000,
+            channels: 1,
+            interleaved: false))
+        let buffer = try #require(AVAudioPCMBuffer(pcmFormat: format, frameCapacity: 4))
+        buffer.frameLength = 4
+
+        let normalized = SpeechAudioBufferNormalizer.speechCompatibleBuffer(from: buffer)
+
+        #expect(normalized === buffer)
+    }
+
+    @Test func `speech normalizer downmixes multichannel float buffers to mono`() throws {
+        var layout = AudioChannelLayout()
+        layout.mChannelLayoutTag = kAudioChannelLayoutTag_Quadraphonic
+        let channelLayout = AVAudioChannelLayout(layout: &layout)
+        let format = AVAudioFormat(
+            commonFormat: .pcmFormatFloat32,
+            sampleRate: 16_000,
+            interleaved: false,
+            channelLayout: channelLayout)
+        let buffer = try #require(AVAudioPCMBuffer(pcmFormat: format, frameCapacity: 2))
+        buffer.frameLength = 2
+        let channels = try #require(buffer.floatChannelData)
+        for frame in 0..<2 {
+            channels[0][frame] = 1
+            channels[1][frame] = 3
+            channels[2][frame] = 5
+            channels[3][frame] = 7
+        }
+
+        let normalized = SpeechAudioBufferNormalizer.speechCompatibleBuffer(from: buffer)
+
+        #expect(normalized.format.channelCount == 1)
+        #expect(normalized.frameLength == 2)
+        let output = try #require(normalized.floatChannelData?[0])
+        #expect(output[0] == 4)
+        #expect(output[1] == 4)
+    }
+
     @Test func `delta trims committed prefix`() {
         let delta = VoicePushToTalk._testDelta(committed: "hello ", current: "hello world again")
         #expect(delta == "world again")