Skip to main content

voirs_evaluation/audio/
mod.rs

1//! Comprehensive audio I/O and format support for VoiRS evaluation.
2//!
3//! This module provides comprehensive audio format support for loading, processing,
4//! and converting various audio formats commonly used in speech evaluation.
5//!
6//! ## Supported Formats
7//!
8//! - **WAV**: Uncompressed PCM audio (via `hound`)
9//! - **FLAC**: Lossless audio compression (via `claxon`)
10//! - **MP3**: MPEG Layer-3 audio (via `minimp3`)
11//! - **OGG**: Ogg Vorbis audio (via `lewton`)
12//! - **M4A**: AAC audio in MP4 container (via `mp4parse`)
13//! - **AIFF**: Audio Interchange File Format
14//!
15//! ## Features
16//!
17//! - Automatic format detection
18//! - Sample rate conversion with high-quality resampling
19//! - Multi-channel to mono/stereo conversion
20//! - Audio normalization and gain control
21//! - Streaming audio support for real-time evaluation
22//! - Memory-efficient chunked processing
23//!
24//! ## Examples
25//!
26//! ```rust
27//! use voirs_evaluation::audio::LoadOptions;
28//! use voirs_sdk::AudioBuffer;
29//!
30//! # #[tokio::main]
31//! # async fn main() -> Result<(), Box<dyn std::error::Error>> {
32//! // Create test audio buffer
33//! let audio = AudioBuffer::new(vec![0.1; 16000], 16000, 1);
34//!
35//! // Demonstrate load options creation
36//! let options = LoadOptions::new()
37//!     .target_sample_rate(16000)
38//!     .target_channels(1)
39//!     .normalize(true);
40//!     
41//! println!("Audio format options configured for {:?} Hz", options.target_sample_rate);
42//! # Ok(())
43//! # }
44//! ```
45
46use crate::EvaluationError;
47use std::path::Path;
48use voirs_sdk::AudioBuffer;
49
50pub mod auto_conversion;
51pub mod conversion;
52pub mod formats;
53pub mod loader;
54pub mod streaming;
55pub mod validation;
56
57// Re-export key types
58pub use auto_conversion::*;
59pub use conversion::*;
60pub use formats::*;
61pub use loader::*;
62pub use streaming::*;
63pub use validation::*;
64
65/// Audio format enumeration
66#[derive(Debug, Clone, Copy, PartialEq, Eq)]
67pub enum AudioFormat {
68    /// WAV format (uncompressed PCM)
69    Wav,
70    /// FLAC format (lossless compression)
71    Flac,
72    /// MP3 format (lossy compression)
73    Mp3,
74    /// OGG Vorbis format (lossy compression)
75    Ogg,
76    /// M4A format (AAC in MP4 container)
77    M4a,
78    /// AIFF format (uncompressed PCM)
79    Aiff,
80    /// Unknown or unsupported format
81    Unknown,
82}
83
84impl AudioFormat {
85    /// Detect audio format from file extension
86    pub fn from_extension(path: &Path) -> Self {
87        if let Some(ext) = path.extension().and_then(|s| s.to_str()) {
88            match ext.to_lowercase().as_str() {
89                "wav" | "wave" => Self::Wav,
90                "flac" => Self::Flac,
91                "mp3" => Self::Mp3,
92                "ogg" => Self::Ogg,
93                "m4a" | "aac" => Self::M4a,
94                "aiff" | "aif" => Self::Aiff,
95                _ => Self::Unknown,
96            }
97        } else {
98            Self::Unknown
99        }
100    }
101
102    /// Get common file extensions for this format
103    pub fn extensions(&self) -> &'static [&'static str] {
104        match self {
105            Self::Wav => &["wav", "wave"],
106            Self::Flac => &["flac"],
107            Self::Mp3 => &["mp3"],
108            Self::Ogg => &["ogg"],
109            Self::M4a => &["m4a", "aac"],
110            Self::Aiff => &["aiff", "aif"],
111            Self::Unknown => &[],
112        }
113    }
114
115    /// Check if format supports metadata
116    pub fn supports_metadata(&self) -> bool {
117        matches!(self, Self::Flac | Self::Mp3 | Self::Ogg | Self::M4a)
118    }
119
120    /// Check if format is lossless
121    pub fn is_lossless(&self) -> bool {
122        matches!(self, Self::Wav | Self::Flac | Self::Aiff)
123    }
124}
125
126/// Audio loading options
127#[derive(Debug, Clone)]
128pub struct LoadOptions {
129    /// Target sample rate (None = keep original)
130    pub target_sample_rate: Option<u32>,
131    /// Target number of channels (None = keep original)
132    pub target_channels: Option<u32>,
133    /// Normalize audio to [-1.0, 1.0] range
134    pub normalize: bool,
135    /// Apply DC offset removal
136    pub remove_dc_offset: bool,
137    /// Start offset in seconds
138    pub start_offset: Option<f64>,
139    /// Duration to load in seconds (None = load all)
140    pub duration: Option<f64>,
141    /// Quality level for sample rate conversion (0-10)
142    pub resample_quality: u8,
143}
144
145impl Default for LoadOptions {
146    fn default() -> Self {
147        Self {
148            target_sample_rate: None,
149            target_channels: None,
150            normalize: false,
151            remove_dc_offset: true,
152            start_offset: None,
153            duration: None,
154            resample_quality: 7, // High quality resampling
155        }
156    }
157}
158
159impl LoadOptions {
160    /// Create new loading options with defaults
161    pub fn new() -> Self {
162        Self::default()
163    }
164
165    /// Set target sample rate
166    pub fn target_sample_rate(mut self, sample_rate: u32) -> Self {
167        self.target_sample_rate = Some(sample_rate);
168        self
169    }
170
171    /// Set target number of channels
172    pub fn target_channels(mut self, channels: u32) -> Self {
173        self.target_channels = Some(channels);
174        self
175    }
176
177    /// Enable or disable normalization
178    pub fn normalize(mut self, normalize: bool) -> Self {
179        self.normalize = normalize;
180        self
181    }
182
183    /// Enable or disable DC offset removal
184    pub fn remove_dc_offset(mut self, remove_dc: bool) -> Self {
185        self.remove_dc_offset = remove_dc;
186        self
187    }
188
189    /// Set start offset in seconds
190    pub fn start_offset(mut self, offset: f64) -> Self {
191        self.start_offset = Some(offset);
192        self
193    }
194
195    /// Set duration to load in seconds
196    pub fn duration(mut self, duration: f64) -> Self {
197        self.duration = Some(duration);
198        self
199    }
200
201    /// Set resampling quality (0-10, higher is better)
202    pub fn resample_quality(mut self, quality: u8) -> Self {
203        self.resample_quality = quality.min(10);
204        self
205    }
206}
207
208/// Audio metadata extracted from files
209#[derive(Debug, Clone, Default)]
210pub struct AudioMetadata {
211    /// Title of the audio
212    pub title: Option<String>,
213    /// Artist name
214    pub artist: Option<String>,
215    /// Album name
216    pub album: Option<String>,
217    /// Track number
218    pub track: Option<u32>,
219    /// Year of release
220    pub year: Option<u32>,
221    /// Genre
222    pub genre: Option<String>,
223    /// Duration in seconds
224    pub duration: Option<f64>,
225    /// Bit rate (for compressed formats)
226    pub bitrate: Option<u32>,
227}
228
229/// Error types specific to audio I/O operations
230#[derive(Debug, thiserror::Error)]
231pub enum AudioIoError {
232    /// Unsupported audio format
233    #[error("Unsupported audio format: {format:?}")]
234    UnsupportedFormat {
235        /// Audio format
236        format: AudioFormat,
237    },
238
239    /// File I/O error
240    #[error("File I/O error: {message}")]
241    IoError {
242        /// Error message
243        message: String,
244        /// Source error
245        #[source]
246        source: Option<Box<dyn std::error::Error + Send + Sync>>,
247    },
248
249    /// Audio decoding error
250    #[error("Audio decoding error: {message}")]
251    DecodingError {
252        /// Error message
253        message: String,
254        /// Source error
255        #[source]
256        source: Option<Box<dyn std::error::Error + Send + Sync>>,
257    },
258
259    /// Invalid audio parameters
260    #[error("Invalid audio parameters: {message}")]
261    InvalidParameters {
262        /// Error message
263        message: String,
264    },
265
266    /// Conversion error
267    #[error("Audio conversion error: {message}")]
268    ConversionError {
269        /// Error message
270        message: String,
271    },
272}
273
274impl From<AudioIoError> for EvaluationError {
275    fn from(err: AudioIoError) -> Self {
276        EvaluationError::AudioProcessingError {
277            message: err.to_string(),
278            source: Some(Box::new(err)),
279        }
280    }
281}
282
283/// Result type for audio I/O operations
284pub type AudioIoResult<T> = Result<T, AudioIoError>;
285
286#[cfg(test)]
287mod tests {
288    use super::*;
289    use std::path::PathBuf;
290
291    #[test]
292    fn test_format_detection() {
293        assert_eq!(
294            AudioFormat::from_extension(Path::new("test.wav")),
295            AudioFormat::Wav
296        );
297        assert_eq!(
298            AudioFormat::from_extension(Path::new("test.flac")),
299            AudioFormat::Flac
300        );
301        assert_eq!(
302            AudioFormat::from_extension(Path::new("test.mp3")),
303            AudioFormat::Mp3
304        );
305        assert_eq!(
306            AudioFormat::from_extension(Path::new("test.ogg")),
307            AudioFormat::Ogg
308        );
309        assert_eq!(
310            AudioFormat::from_extension(Path::new("test.m4a")),
311            AudioFormat::M4a
312        );
313        assert_eq!(
314            AudioFormat::from_extension(Path::new("test.aiff")),
315            AudioFormat::Aiff
316        );
317        assert_eq!(
318            AudioFormat::from_extension(Path::new("test.xyz")),
319            AudioFormat::Unknown
320        );
321    }
322
323    #[test]
324    fn test_format_properties() {
325        assert!(AudioFormat::Wav.is_lossless());
326        assert!(AudioFormat::Flac.is_lossless());
327        assert!(!AudioFormat::Mp3.is_lossless());
328        assert!(!AudioFormat::Ogg.is_lossless());
329
330        assert!(!AudioFormat::Wav.supports_metadata());
331        assert!(AudioFormat::Flac.supports_metadata());
332        assert!(AudioFormat::Mp3.supports_metadata());
333    }
334
335    #[test]
336    fn test_load_options() {
337        let options = LoadOptions::new()
338            .target_sample_rate(16000)
339            .target_channels(1)
340            .normalize(true);
341
342        assert_eq!(options.target_sample_rate, Some(16000));
343        assert_eq!(options.target_channels, Some(1));
344        assert!(options.normalize);
345    }
346
347    #[test]
348    fn test_load_options_defaults() {
349        let options = LoadOptions::default();
350        assert_eq!(options.target_sample_rate, None);
351        assert_eq!(options.target_channels, None);
352        assert!(!options.normalize);
353        assert!(options.remove_dc_offset);
354        assert_eq!(options.resample_quality, 7);
355    }
356}