kinect_v2/
audio_capture.rs

1use std::sync::Arc;
2
3use kinect_v2_sys::{
4    DEFAULT_FRAME_WAIT_TIMEOUT_MS, WAITABLE_HANDLE,
5    audio::{AudioBeamFrameList, AudioBeamFrameReader},
6    kinect::{self, KinectSensor},
7};
8use windows::Win32::Foundation::{E_FAIL, WAIT_OBJECT_0, WAIT_TIMEOUT};
9use windows::Win32::System::Threading::WaitForSingleObject;
10use windows::{Win32::Foundation::WAIT_EVENT, core::Error};
11
12/// Manages the Kinect sensor and provides access to audio frame data.
13///
14/// This struct is responsible for initializing and holding the necessary Kinect
15/// resources to capture audio frames from audio beams.
16pub struct AudioFrameCapture {
17    kinect: KinectSensor, // keep the kinect sensor instance alive.
18}
19
20impl AudioFrameCapture {
21    /// Creates a new `AudioFrameCapture` instance.
22    ///
23    /// This function initializes the default Kinect sensor, opens it,
24    /// and sets up the audio source and audio beam frame reader.
25    ///
26    /// # Errors
27    ///
28    /// Returns an error if the Kinect sensor cannot be initialized,
29    /// opened, or if the audio source is not active.
30    pub fn new() -> Result<Self, Error> {
31        let kinect = kinect::get_default_kinect_sensor()?;
32        kinect.open()?;
33
34        Ok(AudioFrameCapture { kinect })
35    }
36
37    /// Returns an iterator over audio frames.
38    ///
39    /// The iterator will block waiting for new frames. Each item yielded by
40    /// the iterator is a `Result<AudioFrameData, Error>`, allowing for error
41    /// handling during frame acquisition.
42    ///
43    /// # Errors
44    ///
45    /// Returns an error if it fails to subscribe to the frame arrived event,
46    /// which is necessary for the iterator to function.
47    pub fn iter(&self) -> Result<AudioFrameCaptureIter, Error> {
48        let audio_source = self.kinect.audio_source()?;
49        // Open the reader to active the source.
50        let reader = audio_source.open_reader()?;
51        // Ensure the audio source is active.
52        // If not, event subscription and frame acquisition might fail.
53        if !audio_source.get_is_active()? {
54            log::warn!("Audio source is not active, cannot subscribe to frame arrived event.");
55            return Err(Error::from_hresult(E_FAIL));
56        }
57
58        let waitable_handle = reader.subscribe_frame_arrived()?;
59        Ok(AudioFrameCaptureIter {
60            reader,
61            waitable_handle,
62            timeout_ms: DEFAULT_FRAME_WAIT_TIMEOUT_MS,
63        })
64    }
65}
66
67/// An iterator that yields audio frames from a Kinect sensor.
68///
69/// This iterator blocks until a new frame is available or an error occurs.
70/// It is created by calling the `iter` method on `AudioFrameCapture`.
71pub struct AudioFrameCaptureIter {
72    reader: AudioBeamFrameReader,
73    waitable_handle: WAITABLE_HANDLE,
74    timeout_ms: u32,
75}
76
77impl Drop for AudioFrameCaptureIter {
78    fn drop(&mut self) {
79        // Best effort to unsubscribe from the frame arrived event.
80        // Errors in `drop` are typically logged or ignored, as panicking in drop is problematic.
81        if let Err(e) = self.reader.unsubscribe_frame_arrived(self.waitable_handle) {
82            log::warn!("Failed to unsubscribe audio frame arrived event: {e:?}");
83        }
84    }
85}
86
87impl Iterator for AudioFrameCaptureIter {
88    type Item = Result<AudioFrameData, Error>;
89
90    fn next(&mut self) -> Option<Self::Item> {
91        loop {
92            let wait_status: WAIT_EVENT =
93                unsafe { WaitForSingleObject(self.waitable_handle, self.timeout_ms) };
94
95            if wait_status == WAIT_OBJECT_0 {
96                // Frame event was signaled.
97                // Use a closure and the `?` operator for cleaner error handling.
98                let result = (|| {
99                    let event_args = self
100                        .reader
101                        .get_frame_arrived_event_data(self.waitable_handle)?;
102                    let frame_reference = event_args.get_frame_reference()?;
103                    let audio_beam_frame_list = frame_reference.acquire_beam_frame()?;
104                    AudioFrameData::new(&audio_beam_frame_list)
105                })(); // Immediately invoke the closure
106                return Some(result);
107            } else if wait_status == WAIT_TIMEOUT {
108                // No new frame arrived within the timeout period.
109                // Continue waiting as this is a blocking iterator.
110                continue;
111            } else {
112                return Some(Err(Error::from_hresult(E_FAIL)));
113            }
114        }
115    }
116}
117
118#[derive(Debug, Clone)]
119pub struct AudioBeamData {
120    pub beam_angle: f32,
121    pub beam_angle_confidence: f32,
122    pub sub_frames: Vec<AudioSubFrameData>,
123}
124
125#[derive(Debug, Clone)]
126pub struct AudioSubFrameData {
127    pub duration: i64, // TIMESPAN
128    pub beam_angle: f32,
129    pub beam_angle_confidence: f32,
130    pub relative_time: i64, // TIMESPAN
131    pub audio_data: Vec<u8>,
132}
133
134#[derive(Debug, Clone)]
135pub struct AudioFrameData {
136    pub timestamp: u64,
137    pub beam_angle: f32,
138    pub beam_angle_confidence: f32,
139    pub data: Arc<[u8]>,
140}
141
142impl AudioFrameData {
143    pub fn new(audio_beam_frame_list: &AudioBeamFrameList) -> Result<Self, Error> {
144        let beam_count = audio_beam_frame_list.get_beam_count()?;
145        assert_eq!(beam_count, 1, "Kinect V2 only supports one audio beam");
146
147        let audio_beam_frame = audio_beam_frame_list.open_audio_beam_frame(0)?;
148        let timestamp = audio_beam_frame.get_relative_time_start()? as u64;
149        let audio_beam = audio_beam_frame.get_audio_beam()?;
150        let beam_angle = audio_beam.get_beam_angle()?;
151        let beam_angle_confidence = audio_beam.get_beam_angle_confidence()?;
152
153        let sub_frame_count = audio_beam_frame.get_sub_frame_count()?;
154        let mut sub_frames = Vec::new();
155
156        for sub_frame_index in 0..sub_frame_count {
157            let audio_sub_frame = audio_beam_frame.get_sub_frame(sub_frame_index)?;
158
159            let duration = audio_sub_frame.get_duration()?;
160            let sub_beam_angle = audio_sub_frame.get_beam_angle()?;
161            let sub_beam_angle_confidence = audio_sub_frame.get_beam_angle_confidence()?;
162            let relative_time = audio_sub_frame.get_relative_time()?;
163
164            // Get audio data from the sub-frame
165            let audio_data = audio_sub_frame.access_underlying_buffer()?.to_vec();
166
167            sub_frames.push(AudioSubFrameData {
168                duration,
169                beam_angle: sub_beam_angle,
170                beam_angle_confidence: sub_beam_angle_confidence,
171                relative_time,
172                audio_data,
173            });
174        }
175
176        let mut data = Vec::new();
177        for sub_frame in sub_frames {
178            data.extend_from_slice(&sub_frame.audio_data);
179        }
180
181        Ok(Self {
182            timestamp,
183            beam_angle,
184            beam_angle_confidence,
185            data: Arc::from(data), // Use Arc to avoid expensive cloning
186        })
187    }
188}
189
190impl Default for AudioFrameData {
191    fn default() -> Self {
192        Self {
193            timestamp: 0,
194            beam_angle: 0.0,
195            beam_angle_confidence: 0.0,
196            data: Arc::new([]), // Use an empty Arc<[u8]> for default
197        }
198    }
199}
200
201#[cfg(test)]
202mod tests {
203    use super::*;
204    use anyhow::anyhow;
205    use std::sync::mpsc;
206
207    #[test]
208    fn audio_capture_test() -> anyhow::Result<()> {
209        let (audio_tx, audio_rx) = mpsc::channel::<AudioFrameData>();
210        let max_frames_to_capture = 10;
211        let audio_capture_thread = std::thread::spawn(move || -> anyhow::Result<()> {
212            let audio_capture = AudioFrameCapture::new()?;
213            for (frame_count, frame) in audio_capture.iter()?.enumerate() {
214                if frame_count >= max_frames_to_capture {
215                    break;
216                }
217                let data = frame.map_err(|e| anyhow!("Error capturing audio frame: {}", e))?;
218                if audio_tx.send(data).is_err() {
219                    break;
220                }
221            }
222            Ok(())
223        });
224
225        let processing_thread = std::thread::spawn(move || -> anyhow::Result<()> {
226            for _ in 0..max_frames_to_capture {
227                let frame_data = match audio_rx.recv() {
228                    Ok(data) => data,
229                    Err(_) => break,
230                };
231                println!(
232                    "Received audio frame: timestamp: {}, beam_angle: {}, confidence: {}, data_len: {}",
233                    frame_data.timestamp,
234                    frame_data.beam_angle,
235                    frame_data.beam_angle_confidence,
236                    frame_data.data.len()
237                );
238                anyhow::ensure!(!frame_data.data.is_empty(), "Audio data is empty");
239                anyhow::ensure!(frame_data.timestamp > 0, "Timestamp is not positive");
240            }
241            Ok(())
242        });
243
244        audio_capture_thread
245            .join()
246            .map_err(|e| anyhow!("Audio capture thread join error: {:?}", e))??;
247        processing_thread
248            .join()
249            .map_err(|e| anyhow!("Processing thread join error: {:?}", e))??;
250        Ok(())
251    }
252
253    #[test]
254    fn record_wav_audio_test() -> anyhow::Result<()> {
255        let wav_spec = hound::WavSpec {
256            channels: 1,
257            sample_rate: 16000,  // Kinect's audio sample rate
258            bits_per_sample: 32, // 32-bit float samples
259            sample_format: hound::SampleFormat::Float,
260        };
261        let mut wav_writer = hound::WavWriter::create("test_audio.wav", wav_spec)?;
262
263        let (audio_tx, audio_rx) = mpsc::channel::<AudioFrameData>();
264        std::thread::spawn(move || {
265            let audio_capture = AudioFrameCapture::new().unwrap();
266            for frame in audio_capture.iter().unwrap() {
267                match frame {
268                    Ok(data) => {
269                        audio_tx.send(data).unwrap();
270                    }
271                    Err(e) => {
272                        eprintln!("Error capturing audio frame: {e}");
273                    }
274                }
275            }
276        });
277
278        let mut frame_counter = 0;
279        println!("Starting audio capture...");
280        loop {
281            if let Ok(frame) = audio_rx.recv() {
282                // Interpret as 32-bit float samples (Kinect's native format)
283                let f32_samples: Vec<f32> = frame
284                    .data
285                    .chunks_exact(4)
286                    .map(|chunk| f32::from_ne_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]))
287                    .collect();
288
289                for sample in f32_samples {
290                    wav_writer.write_sample(sample)?;
291                }
292
293                frame_counter += 1;
294                if frame_counter >= 512 {
295                    // Limit to 512 frames for testing
296                    break;
297                }
298            }
299        }
300
301        wav_writer.finalize()?;
302        println!("Audio data written to test_audio.wav");
303
304        Ok(())
305    }
306}