whisp/
audio.rs

1//! Audio recording module for whisp.
2//!
3//! This crate provides audio recording functionality using the system's
4//! default input device. It's platform-agnostic and uses channels for
5//! event communication instead of depending on any specific UI framework.
6//!
7//! ## Format notes
8//!
9//! WAV format uses ~467KiB every 5 seconds, hitting the 25MiB API limit
10//! in about 4m30s. This is sufficient for most dictation use cases.
11
12use std::io::{self, Cursor, Seek, SeekFrom, Write};
13use std::sync::mpsc::Sender;
14use std::sync::{Arc, Mutex};
15use std::time::Duration;
16
17use cpal::Host;
18use cpal::traits::{DeviceTrait, HostTrait, StreamTrait};
19use hound::{WavSpec, WavWriter};
20use thiserror::Error;
21use tracing::{error, info};
22
23use crate::core::{AudioEvent, MicState, RecordingState};
24
25/// Errors that can occur during recording.
26#[derive(Debug, Error)]
27pub enum RecorderError {
28    #[error(transparent)]
29    Anyhow(#[from] anyhow::Error),
30
31    #[error("no input device available")]
32    NoInputDevice,
33
34    #[error("sample format not supported: {0}")]
35    SampleFormatNotSupported(String),
36
37    #[error(transparent)]
38    BuildStream(#[from] cpal::BuildStreamError),
39}
40
41pub type Result<T> = std::result::Result<T, RecorderError>;
42
43type WavWriterHandle = Arc<Mutex<Option<WavWriter<MemoryWriter>>>>;
44
45/// A cheaply cloneable handle to the recording buffer.
46#[derive(Clone)]
47struct MemoryWriter {
48    inner: Arc<Mutex<Cursor<Vec<u8>>>>,
49}
50
51impl MemoryWriter {
52    fn new() -> Self {
53        Self {
54            inner: Arc::new(Mutex::new(Cursor::new(Vec::with_capacity(8 * 1024)))),
55        }
56    }
57
58    fn try_into_inner(self) -> Result<Vec<u8>> {
59        let owned = Arc::try_unwrap(self.inner).map_err(|_| {
60            RecorderError::Anyhow(anyhow::anyhow!(
61                "Failed to unwrap inner Arc in MemoryWriter"
62            ))
63        })?;
64        let cursor = owned.into_inner().unwrap();
65        Ok(cursor.into_inner())
66    }
67}
68
69impl Seek for MemoryWriter {
70    fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
71        self.inner.lock().unwrap().seek(pos)
72    }
73}
74
75impl Write for MemoryWriter {
76    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
77        self.inner.lock().unwrap().write(buf)
78    }
79
80    fn flush(&mut self) -> io::Result<()> {
81        self.inner.lock().unwrap().flush()
82    }
83}
84
85/// Audio recorder using the system's default input device.
86pub struct Recorder {
87    host: Host,
88}
89
90impl Default for Recorder {
91    fn default() -> Self {
92        Self::new()
93    }
94}
95
96impl Recorder {
97    /// Create a new recorder.
98    pub fn new() -> Self {
99        Self {
100            host: cpal::default_host(),
101        }
102    }
103
104    /// Start recording audio.
105    ///
106    /// The `event_sender` is used to notify when the mic becomes active
107    /// (receives non-silent audio). Pass `None` if you don't need events.
108    pub fn start_recording(
109        &self,
110        event_sender: Option<Sender<AudioEvent>>,
111    ) -> Result<RecordingHandle> {
112        let device = self
113            .host
114            .default_input_device()
115            .ok_or(RecorderError::NoInputDevice)?;
116        let config = device
117            .default_input_config()
118            .map_err(|_| RecorderError::NoInputDevice)?;
119
120        info!(
121            device_name = %device.name().unwrap_or_default(),
122            config = ?config,
123            "Recording from device"
124        );
125
126        let spec = wav_spec_from_config(&config);
127
128        let buffer = MemoryWriter::new();
129        let writer =
130            WavWriter::new(buffer.clone(), spec).map_err(|e| RecorderError::Anyhow(e.into()))?;
131        let writer = Arc::new(Mutex::new(Some(writer)));
132
133        let writer_2 = writer.clone();
134
135        let err_fn = move |err| {
136            error!("an error occurred on stream: {}", err);
137        };
138
139        let mut state = RecordingState::default();
140
141        let stream = match config.sample_format() {
142            cpal::SampleFormat::F32 => device.build_input_stream(
143                &config.into(),
144                move |data, _: &_| write_data(&mut state, data, &writer_2, &event_sender),
145                err_fn,
146                None,
147            )?,
148            sample_format => {
149                return Err(RecorderError::SampleFormatNotSupported(format!(
150                    "{:?}",
151                    sample_format
152                )));
153            }
154        };
155
156        stream
157            .play()
158            .map_err(|_| anyhow::anyhow!("failed to play stream"))?;
159
160        Ok(RecordingHandle {
161            stream,
162            writer,
163            buffer: Some(buffer),
164            spec,
165        })
166    }
167}
168
169/// Handle to an active recording.
170///
171/// Call `finish()` to stop recording and retrieve the audio data.
172/// If dropped without calling `finish()`, the recording will be finalized
173/// but you won't be able to retrieve the data.
174pub struct RecordingHandle {
175    stream: cpal::Stream,
176    writer: WavWriterHandle,
177    buffer: Option<MemoryWriter>,
178    spec: WavSpec,
179}
180
181/// A completed recording with audio data.
182pub struct Recording {
183    data: Vec<u8>,
184    spec: WavSpec,
185}
186
187impl Recording {
188    /// Get the raw audio data (WAV format).
189    pub fn data(&self) -> &[u8] {
190        &self.data
191    }
192
193    /// Get the WAV specification.
194    pub fn spec(&self) -> &WavSpec {
195        &self.spec
196    }
197
198    /// Get the number of samples in the recording.
199    pub fn samples(&self) -> u64 {
200        self.data.len() as u64 / (self.spec.bits_per_sample / 8) as u64
201    }
202
203    /// Get the duration of the recording.
204    pub fn duration(&self) -> Duration {
205        let num_samples = self.samples();
206        let duration = num_samples as f64 / self.spec.sample_rate as f64;
207        Duration::from_secs_f64(duration)
208    }
209
210    /// Consume the recording and return the raw data.
211    pub fn into_data(self) -> Vec<u8> {
212        self.data
213    }
214}
215
216impl RecordingHandle {
217    /// Finish the recording and return the audio data.
218    pub fn finish(&mut self) -> Result<Option<Recording>> {
219        if self.buffer.is_none() {
220            return Ok(None);
221        }
222
223        info!("ending recording");
224        let buffer = self.buffer.take().unwrap();
225
226        self.stream.pause().ok();
227
228        self.writer
229            .lock()
230            .unwrap()
231            .take()
232            .unwrap()
233            .finalize()
234            .map_err(|e| {
235                RecorderError::Anyhow(anyhow::anyhow!("Failed to finalize writer: {}", e))
236            })?;
237
238        let data = buffer.try_into_inner()?;
239
240        Ok(Some(Recording {
241            data,
242            spec: self.spec,
243        }))
244    }
245}
246
247impl Drop for RecordingHandle {
248    fn drop(&mut self) {
249        if self.buffer.is_some()
250            && let Err(e) = self.finish()
251        {
252            error!("failed to finalize recording: {}", e);
253        }
254    }
255}
256
257fn wav_spec_from_config(config: &cpal::SupportedStreamConfig) -> hound::WavSpec {
258    hound::WavSpec {
259        channels: config.channels(),
260        sample_rate: config.sample_rate().0,
261        bits_per_sample: (config.sample_format().sample_size() * 8) as _,
262        sample_format: sample_format(config.sample_format()),
263    }
264}
265
266fn sample_format(format: cpal::SampleFormat) -> hound::SampleFormat {
267    if format.is_float() {
268        hound::SampleFormat::Float
269    } else {
270        hound::SampleFormat::Int
271    }
272}
273
274fn write_data(
275    state: &mut RecordingState,
276    data: &[f32],
277    writer: &WavWriterHandle,
278    event_sender: &Option<Sender<AudioEvent>>,
279) {
280    if !state.mic_active {
281        if data.iter().any(|&sample| sample != 0.0) {
282            state.mic_active = true;
283            if let Some(sender) = event_sender {
284                sender.send(AudioEvent::StateChanged(MicState::Active)).ok();
285            }
286        } else {
287            return;
288        }
289    }
290
291    if let Ok(mut guard) = writer.try_lock()
292        && let Some(writer) = guard.as_mut()
293    {
294        for &sample in data.iter() {
295            writer.write_sample(sample).ok();
296        }
297    }
298}