use base64::Engine as _;
mod capture;
mod devices;
mod ffi;
mod speech;
pub use capture::{capture_microphone, capture_system_audio, validate_duration};
pub use devices::{check_microphone_permission, list_audio_devices, AudioDevice};
pub use speech::{speak, transcribe};
pub const MAX_CAPTURE_SECS: f32 = 30.0;
pub(crate) const SAMPLE_RATE: u32 = 16_000;
pub(crate) const CHANNELS: u16 = 1;
pub(crate) const BITS_PER_SAMPLE: u16 = 16;
#[derive(Debug, thiserror::Error)]
pub enum AudioError {
#[error(
"Microphone access denied. Enable it at System Settings > Privacy & Security > Microphone."
)]
PermissionDenied,
#[error("Duration {requested}s exceeds maximum allowed {max}s")]
DurationExceeded { requested: f32, max: f32 },
#[error("Audio framework error: {0}")]
Framework(String),
#[error("Transcription failed: {0}")]
Transcription(String),
#[error("Speech synthesis failed: {0}")]
Synthesis(String),
}
impl AudioError {
#[must_use]
pub fn code(&self) -> &'static str {
match self {
Self::PermissionDenied => "microphone_denied",
Self::DurationExceeded { .. } => "duration_exceeded",
Self::Framework(_) => "framework_error",
Self::Transcription(_) => "transcription_error",
Self::Synthesis(_) => "synthesis_error",
}
}
}
#[derive(Debug, Clone)]
pub struct AudioData {
pub samples: Vec<f32>,
pub sample_rate: u32,
pub channels: u16,
pub duration_secs: f32,
}
impl AudioData {
#[must_use]
pub fn silent(duration_secs: f32) -> Self {
#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
let n_samples = (f64::from(duration_secs) * f64::from(SAMPLE_RATE)) as usize;
Self {
samples: vec![0.0f32; n_samples],
sample_rate: SAMPLE_RATE,
channels: CHANNELS,
duration_secs,
}
}
#[must_use]
pub fn to_wav_base64(&self) -> String {
let bytes = self.to_wav_bytes();
base64::engine::general_purpose::STANDARD.encode(&bytes)
}
#[must_use]
pub fn to_wav_bytes(&self) -> Vec<u8> {
encode_wav_pcm16(&self.samples, self.sample_rate, self.channels)
}
#[must_use]
pub fn duration_ms(&self) -> u64 {
#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
{
(f64::from(self.duration_secs) * 1000.0) as u64
}
}
}
pub(crate) fn encode_wav_pcm16(samples: &[f32], sample_rate: u32, channels: u16) -> Vec<u8> {
let bytes_per_sample: u16 = BITS_PER_SAMPLE / 8;
#[allow(clippy::cast_possible_truncation)]
let data_len = (samples.len() * bytes_per_sample as usize) as u32;
let fmt_chunk_size: u32 = 16;
let riff_size = 4 + (8 + fmt_chunk_size) + (8 + data_len);
let byte_rate = sample_rate * u32::from(channels) * u32::from(bytes_per_sample);
let block_align = channels * bytes_per_sample;
let mut out = Vec::with_capacity(44 + data_len as usize);
out.extend_from_slice(b"RIFF");
out.extend_from_slice(&riff_size.to_le_bytes());
out.extend_from_slice(b"WAVE");
out.extend_from_slice(b"fmt ");
out.extend_from_slice(&fmt_chunk_size.to_le_bytes());
out.extend_from_slice(&1u16.to_le_bytes()); out.extend_from_slice(&channels.to_le_bytes());
out.extend_from_slice(&sample_rate.to_le_bytes());
out.extend_from_slice(&byte_rate.to_le_bytes());
out.extend_from_slice(&block_align.to_le_bytes());
out.extend_from_slice(&BITS_PER_SAMPLE.to_le_bytes());
out.extend_from_slice(b"data");
out.extend_from_slice(&data_len.to_le_bytes());
for &s in samples {
#[allow(clippy::cast_possible_truncation)]
let pcm = (s.clamp(-1.0, 1.0) * 32767.0) as i16;
out.extend_from_slice(&pcm.to_le_bytes());
}
out
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn audio_error_permission_denied_code() {
assert_eq!(AudioError::PermissionDenied.code(), "microphone_denied");
}
#[test]
fn audio_error_duration_exceeded_code() {
let e = AudioError::DurationExceeded {
requested: 60.0,
max: 30.0,
};
assert_eq!(e.code(), "duration_exceeded");
}
#[test]
fn audio_error_framework_code() {
let e = AudioError::Framework("oops".to_string());
assert_eq!(e.code(), "framework_error");
}
#[test]
fn audio_error_transcription_code() {
let e = AudioError::Transcription("failed".to_string());
assert_eq!(e.code(), "transcription_error");
}
#[test]
fn audio_error_synthesis_code() {
let e = AudioError::Synthesis("failed".to_string());
assert_eq!(e.code(), "synthesis_error");
}
#[test]
fn audio_error_display_includes_message() {
let e = AudioError::Framework("bad call".to_string());
assert!(e.to_string().contains("bad call"));
}
#[test]
fn audio_data_silent_has_correct_sample_count() {
let data = AudioData::silent(1.0);
assert_eq!(data.samples.len(), SAMPLE_RATE as usize);
assert_eq!(data.sample_rate, SAMPLE_RATE);
assert_eq!(data.channels, CHANNELS);
}
#[test]
fn audio_data_silent_all_samples_are_zero() {
let data = AudioData::silent(0.5);
assert!(data.samples.iter().all(|&s| s == 0.0));
}
#[test]
fn audio_data_duration_ms_converts_correctly() {
let data = AudioData::silent(1.5);
assert_eq!(data.duration_ms(), 1500);
}
#[test]
fn audio_data_duration_ms_rounds_down() {
let data = AudioData::silent(1.001);
assert!(data.duration_ms() >= 1000);
}
#[test]
fn encode_wav_pcm16_minimum_header_is_44_bytes() {
let bytes = encode_wav_pcm16(&[], SAMPLE_RATE, CHANNELS);
assert_eq!(bytes.len(), 44);
}
#[test]
fn encode_wav_pcm16_riff_magic() {
let bytes = encode_wav_pcm16(&[], SAMPLE_RATE, CHANNELS);
assert_eq!(&bytes[0..4], b"RIFF");
assert_eq!(&bytes[8..12], b"WAVE");
assert_eq!(&bytes[12..16], b"fmt ");
assert_eq!(&bytes[36..40], b"data");
}
#[test]
fn encode_wav_pcm16_data_length_matches_sample_count() {
let samples: Vec<f32> = vec![0.5; 100];
let bytes = encode_wav_pcm16(&samples, SAMPLE_RATE, CHANNELS);
let data_len = u32::from_le_bytes(bytes[40..44].try_into().unwrap());
assert_eq!(data_len, 200);
assert_eq!(bytes.len(), 244);
}
#[test]
fn audio_data_to_wav_base64_starts_with_riff() {
let data = AudioData::silent(0.0);
let b64 = data.to_wav_base64();
assert!(
b64.starts_with("UklG"),
"expected RIFF magic in base64: {b64}"
);
}
#[test]
fn audio_data_to_wav_base64_round_trips() {
let data = AudioData::silent(0.0);
let b64 = data.to_wav_base64();
let decoded = base64::engine::general_purpose::STANDARD
.decode(&b64)
.unwrap();
assert_eq!(&decoded[0..4], b"RIFF");
}
#[test]
fn audio_data_to_wav_bytes_matches_direct_encoding() {
let data = AudioData::silent(0.1);
let via_method = data.to_wav_bytes();
let direct = encode_wav_pcm16(&data.samples, data.sample_rate, data.channels);
assert_eq!(via_method, direct);
}
}