kinect_v2/
audio_capture.rs1use std::sync::Arc;
2
3use kinect_v2_sys::{
4 DEFAULT_FRAME_WAIT_TIMEOUT_MS, WAITABLE_HANDLE,
5 audio::{AudioBeamFrameList, AudioBeamFrameReader},
6 kinect::{self, KinectSensor},
7};
8use windows::Win32::Foundation::{E_FAIL, WAIT_OBJECT_0, WAIT_TIMEOUT};
9use windows::Win32::System::Threading::WaitForSingleObject;
10use windows::{Win32::Foundation::WAIT_EVENT, core::Error};
11
12pub struct AudioFrameCapture {
17 kinect: KinectSensor, }
19
20impl AudioFrameCapture {
21 pub fn new() -> Result<Self, Error> {
31 let kinect = kinect::get_default_kinect_sensor()?;
32 kinect.open()?;
33
34 Ok(AudioFrameCapture { kinect })
35 }
36
37 pub fn iter(&self) -> Result<AudioFrameCaptureIter, Error> {
48 let audio_source = self.kinect.audio_source()?;
49 let reader = audio_source.open_reader()?;
51 if !audio_source.get_is_active()? {
54 log::warn!("Audio source is not active, cannot subscribe to frame arrived event.");
55 return Err(Error::from_hresult(E_FAIL));
56 }
57
58 let waitable_handle = reader.subscribe_frame_arrived()?;
59 Ok(AudioFrameCaptureIter {
60 reader,
61 waitable_handle,
62 timeout_ms: DEFAULT_FRAME_WAIT_TIMEOUT_MS,
63 })
64 }
65}
66
67pub struct AudioFrameCaptureIter {
72 reader: AudioBeamFrameReader,
73 waitable_handle: WAITABLE_HANDLE,
74 timeout_ms: u32,
75}
76
77impl Drop for AudioFrameCaptureIter {
78 fn drop(&mut self) {
79 if let Err(e) = self.reader.unsubscribe_frame_arrived(self.waitable_handle) {
82 log::warn!("Failed to unsubscribe audio frame arrived event: {e:?}");
83 }
84 }
85}
86
87impl Iterator for AudioFrameCaptureIter {
88 type Item = Result<AudioFrameData, Error>;
89
90 fn next(&mut self) -> Option<Self::Item> {
91 loop {
92 let wait_status: WAIT_EVENT =
93 unsafe { WaitForSingleObject(self.waitable_handle, self.timeout_ms) };
94
95 if wait_status == WAIT_OBJECT_0 {
96 let result = (|| {
99 let event_args = self
100 .reader
101 .get_frame_arrived_event_data(self.waitable_handle)?;
102 let frame_reference = event_args.get_frame_reference()?;
103 let audio_beam_frame_list = frame_reference.acquire_beam_frame()?;
104 AudioFrameData::new(&audio_beam_frame_list)
105 })(); return Some(result);
107 } else if wait_status == WAIT_TIMEOUT {
108 continue;
111 } else {
112 return Some(Err(Error::from_hresult(E_FAIL)));
113 }
114 }
115 }
116}
117
118#[derive(Debug, Clone)]
119pub struct AudioBeamData {
120 pub beam_angle: f32,
121 pub beam_angle_confidence: f32,
122 pub sub_frames: Vec<AudioSubFrameData>,
123}
124
125#[derive(Debug, Clone)]
126pub struct AudioSubFrameData {
127 pub duration: i64, pub beam_angle: f32,
129 pub beam_angle_confidence: f32,
130 pub relative_time: i64, pub audio_data: Vec<u8>,
132}
133
134#[derive(Debug, Clone)]
135pub struct AudioFrameData {
136 pub timestamp: u64,
137 pub beam_angle: f32,
138 pub beam_angle_confidence: f32,
139 pub data: Arc<[u8]>,
140}
141
142impl AudioFrameData {
143 pub fn new(audio_beam_frame_list: &AudioBeamFrameList) -> Result<Self, Error> {
144 let beam_count = audio_beam_frame_list.get_beam_count()?;
145 assert_eq!(beam_count, 1, "Kinect V2 only supports one audio beam");
146
147 let audio_beam_frame = audio_beam_frame_list.open_audio_beam_frame(0)?;
148 let timestamp = audio_beam_frame.get_relative_time_start()? as u64;
149 let audio_beam = audio_beam_frame.get_audio_beam()?;
150 let beam_angle = audio_beam.get_beam_angle()?;
151 let beam_angle_confidence = audio_beam.get_beam_angle_confidence()?;
152
153 let sub_frame_count = audio_beam_frame.get_sub_frame_count()?;
154 let mut sub_frames = Vec::new();
155
156 for sub_frame_index in 0..sub_frame_count {
157 let audio_sub_frame = audio_beam_frame.get_sub_frame(sub_frame_index)?;
158
159 let duration = audio_sub_frame.get_duration()?;
160 let sub_beam_angle = audio_sub_frame.get_beam_angle()?;
161 let sub_beam_angle_confidence = audio_sub_frame.get_beam_angle_confidence()?;
162 let relative_time = audio_sub_frame.get_relative_time()?;
163
164 let audio_data = audio_sub_frame.access_underlying_buffer()?.to_vec();
166
167 sub_frames.push(AudioSubFrameData {
168 duration,
169 beam_angle: sub_beam_angle,
170 beam_angle_confidence: sub_beam_angle_confidence,
171 relative_time,
172 audio_data,
173 });
174 }
175
176 let mut data = Vec::new();
177 for sub_frame in sub_frames {
178 data.extend_from_slice(&sub_frame.audio_data);
179 }
180
181 Ok(Self {
182 timestamp,
183 beam_angle,
184 beam_angle_confidence,
185 data: Arc::from(data), })
187 }
188}
189
190impl Default for AudioFrameData {
191 fn default() -> Self {
192 Self {
193 timestamp: 0,
194 beam_angle: 0.0,
195 beam_angle_confidence: 0.0,
196 data: Arc::new([]), }
198 }
199}
200
201#[cfg(test)]
202mod tests {
203 use super::*;
204 use anyhow::anyhow;
205 use std::sync::mpsc;
206
207 #[test]
208 fn audio_capture_test() -> anyhow::Result<()> {
209 let (audio_tx, audio_rx) = mpsc::channel::<AudioFrameData>();
210 let max_frames_to_capture = 10;
211 let audio_capture_thread = std::thread::spawn(move || -> anyhow::Result<()> {
212 let audio_capture = AudioFrameCapture::new()?;
213 for (frame_count, frame) in audio_capture.iter()?.enumerate() {
214 if frame_count >= max_frames_to_capture {
215 break;
216 }
217 let data = frame.map_err(|e| anyhow!("Error capturing audio frame: {}", e))?;
218 if audio_tx.send(data).is_err() {
219 break;
220 }
221 }
222 Ok(())
223 });
224
225 let processing_thread = std::thread::spawn(move || -> anyhow::Result<()> {
226 for _ in 0..max_frames_to_capture {
227 let frame_data = match audio_rx.recv() {
228 Ok(data) => data,
229 Err(_) => break,
230 };
231 println!(
232 "Received audio frame: timestamp: {}, beam_angle: {}, confidence: {}, data_len: {}",
233 frame_data.timestamp,
234 frame_data.beam_angle,
235 frame_data.beam_angle_confidence,
236 frame_data.data.len()
237 );
238 anyhow::ensure!(!frame_data.data.is_empty(), "Audio data is empty");
239 anyhow::ensure!(frame_data.timestamp > 0, "Timestamp is not positive");
240 }
241 Ok(())
242 });
243
244 audio_capture_thread
245 .join()
246 .map_err(|e| anyhow!("Audio capture thread join error: {:?}", e))??;
247 processing_thread
248 .join()
249 .map_err(|e| anyhow!("Processing thread join error: {:?}", e))??;
250 Ok(())
251 }
252
253 #[test]
254 fn record_wav_audio_test() -> anyhow::Result<()> {
255 let wav_spec = hound::WavSpec {
256 channels: 1,
257 sample_rate: 16000, bits_per_sample: 32, sample_format: hound::SampleFormat::Float,
260 };
261 let mut wav_writer = hound::WavWriter::create("test_audio.wav", wav_spec)?;
262
263 let (audio_tx, audio_rx) = mpsc::channel::<AudioFrameData>();
264 std::thread::spawn(move || {
265 let audio_capture = AudioFrameCapture::new().unwrap();
266 for frame in audio_capture.iter().unwrap() {
267 match frame {
268 Ok(data) => {
269 audio_tx.send(data).unwrap();
270 }
271 Err(e) => {
272 eprintln!("Error capturing audio frame: {e}");
273 }
274 }
275 }
276 });
277
278 let mut frame_counter = 0;
279 println!("Starting audio capture...");
280 loop {
281 if let Ok(frame) = audio_rx.recv() {
282 let f32_samples: Vec<f32> = frame
284 .data
285 .chunks_exact(4)
286 .map(|chunk| f32::from_ne_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]))
287 .collect();
288
289 for sample in f32_samples {
290 wav_writer.write_sample(sample)?;
291 }
292
293 frame_counter += 1;
294 if frame_counter >= 512 {
295 break;
297 }
298 }
299 }
300
301 wav_writer.finalize()?;
302 println!("Audio data written to test_audio.wav");
303
304 Ok(())
305 }
306}