Skip to main content

oximedia_audio_analysis/
lib.rs

1//! Advanced audio analysis and forensics for `OxiMedia`.
2//!
3//! This crate provides comprehensive audio analysis capabilities for professional
4//! audio applications including forensics, voice analysis, music analysis, and more.
5//!
6//! # Features
7//!
8//! ## Spectral Analysis
9//! - Advanced frequency-domain analysis with multiple window functions
10//! - Spectral centroid, flatness, crest factor, and bandwidth computation
11//! - High-resolution spectral features for detailed audio characterization
12//!
13//! ## Voice Analysis
14//! - Voice characteristic analysis (F0, formants, jitter, shimmer, HNR)
15//! - Gender detection using formant analysis and F0 range
16//! - Age estimation based on voice characteristics
17//! - Emotion detection (anger, joy, sadness, neutral)
18//! - Speaker identification and verification
19//!
20//! ## Music Analysis
21//! - Harmonic analysis and chord progression detection
22//! - Advanced rhythmic analysis extending MIR capabilities
23//! - Timbral analysis for sound quality characterization
24//! - Instrument identification using spectral and temporal features
25//!
26//! ## Source Separation
27//! - Vocal/instrumental separation using harmonic-percussive decomposition
28//! - Drum track isolation
29//! - Bass line extraction
30//! - Multi-source separation
31//!
32//! ## Echo and Reverb Analysis
33//! - Echo and reverb detection
34//! - Room acoustics analysis
35//! - RT60 reverberation time measurement
36//! - Early reflection pattern analysis
37//!
38//! ## Distortion Analysis
39//! - Distortion detection and quantification
40//! - Total Harmonic Distortion (THD) measurement
41//! - Clipping detection with threshold analysis
42//! - Non-linear distortion characterization
43//!
44//! ## Dynamic Range Analysis
45//! - Detailed dynamic range computation
46//! - Crest factor analysis
47//! - RMS level tracking over time
48//! - Loudness variation measurement
49//!
50//! ## Transient Detection
51//! - Transient and attack detection
52//! - Envelope analysis with ADSR characterization
53//! - Onset strength function computation
54//!
55//! ## Pitch Analysis
56//! - Pitch tracking using YIN algorithm (patent-free)
57//! - Pitch contour analysis
58//! - Vibrato detection and measurement
59//! - F0 estimation with confidence scoring
60//!
61//! ## Formant Analysis
62//! - Formant frequency analysis (F1-F4)
63//! - Formant tracking over time
64//! - Vowel detection and classification
65//! - Linear Predictive Coding (LPC) for formant extraction
66//!
67//! ## Audio Forensics
68//! - Audio authenticity verification
69//! - Edit detection (cuts, splices, insertions)
70//! - Compression history analysis
71//! - Background noise consistency analysis
72//! - ENF (Electrical Network Frequency) analysis
73//!
74//! ## Noise Analysis
75//! - Noise profiling and characterization
76//! - Noise type classification (white, pink, environmental)
77//! - Signal-to-noise ratio (SNR) computation
78//! - Noise floor estimation
79//!
80//! # Usage Example
81//!
82//! ```rust
83//! use oximedia_audio_analysis::{
84//!     AudioAnalyzer, AnalysisConfig,
85//! };
86//!
87//! // Create analyzer with default configuration
88//! let config = AnalysisConfig::default();
89//! let analyzer = AudioAnalyzer::new(config);
90//!
91//! // Analyze audio samples
92//! let samples = vec![0.0_f32; 44100]; // 1 second of audio
93//! let sample_rate = 44100.0;
94//!
95//! let result = analyzer.analyze(&samples, sample_rate)?;
96//!
97//! // Access spectral features
98//! println!("Spectral centroid: {:.1} Hz", result.spectral.centroid);
99//! println!("Spectral flatness: {:.3}", result.spectral.flatness);
100//!
101//! // Access voice characteristics
102//! if let Some(voice) = result.voice {
103//!     println!("F0: {:.1} Hz", voice.f0);
104//!     println!("Gender: {:?}", voice.gender);
105//! }
106//!
107//! # Ok::<(), oximedia_audio_analysis::AnalysisError>(())
108//! ```
109//!
110//! # Patent-Free Implementation
111//!
112//! All algorithms are implemented using patent-free methods:
113//! - YIN algorithm for pitch detection
114//! - LPC for formant analysis
115//! - Harmonic-percussive separation for source separation
116//! - Autocorrelation-based methods
117//!
118//! # Real-Time Capable
119//!
120//! Most analysis modules support frame-by-frame processing for real-time applications.
121
122#![forbid(unsafe_code)]
123#![warn(missing_docs)]
124#![allow(clippy::cast_precision_loss)]
125#![allow(clippy::cast_possible_truncation)]
126#![allow(clippy::cast_sign_loss)]
127#![allow(clippy::similar_names)]
128#![allow(clippy::many_single_char_names)]
129#![allow(clippy::module_name_repetitions)]
130#![allow(clippy::too_many_arguments)]
131#![allow(clippy::too_many_lines)]
132#![allow(clippy::float_cmp)]
133#![allow(clippy::struct_excessive_bools)]
134#![allow(dead_code, clippy::missing_errors_doc, clippy::missing_panics_doc)]
135
136pub mod beat;
137pub mod cepstral;
138pub mod chroma;
139pub mod compression_analysis;
140pub mod distortion;
141pub mod dynamics;
142pub mod echo;
143pub mod energy;
144pub mod energy_contour;
145pub mod forensics;
146pub mod formant;
147pub mod formant_track;
148pub mod harmony;
149pub mod loudness;
150pub mod loudness_curve;
151pub mod loudness_range;
152/// Mel spectrogram computation for ML-oriented audio feature extraction.
153pub mod mel_spectrogram;
154pub mod music;
155pub mod noise;
156pub mod onset;
157pub mod pitch;
158pub mod pitch_detect;
159pub mod pitch_tracker;
160pub mod psychoacoustic;
161pub mod rhythm;
162/// Audio scene classification: Indoor, Outdoor, Quiet, Noisy, Speech, Music, Mixed.
163pub mod scene_classify;
164pub mod separate;
165pub mod silence_detect;
166pub mod spectral;
167pub mod spectral_contrast;
168pub mod spectral_features;
169pub mod spectral_flux;
170pub mod stereo_field;
171pub mod tempo_analysis;
172pub mod timbre;
173pub mod transient;
174pub mod voice;
175
176use thiserror::Error;
177
178/// Errors that can occur during audio analysis.
179#[derive(Error, Debug, Clone)]
180pub enum AnalysisError {
181    /// Invalid sample rate
182    #[error("Invalid sample rate: {0}")]
183    InvalidSampleRate(f32),
184
185    /// Insufficient samples for analysis
186    #[error("Insufficient samples: need at least {needed}, got {got}")]
187    InsufficientSamples {
188        /// Required number of samples
189        needed: usize,
190        /// Actual number of samples
191        got: usize,
192    },
193
194    /// Invalid configuration parameter
195    #[error("Invalid configuration: {0}")]
196    InvalidConfig(String),
197
198    /// Analysis failed
199    #[error("Analysis failed: {0}")]
200    AnalysisFailed(String),
201
202    /// FFT error
203    #[error("FFT error: {0}")]
204    FftError(String),
205
206    /// Invalid input data
207    #[error("Invalid input: {0}")]
208    InvalidInput(String),
209
210    /// Feature extraction failed
211    #[error("Feature extraction failed: {0}")]
212    FeatureExtractionFailed(String),
213}
214
215/// Result type for audio analysis operations.
216pub type Result<T> = std::result::Result<T, AnalysisError>;
217
218/// Configuration for audio analysis.
219#[derive(Debug, Clone)]
220pub struct AnalysisConfig {
221    /// FFT size for frequency analysis
222    pub fft_size: usize,
223    /// Hop size for frame-based analysis
224    pub hop_size: usize,
225    /// Window function type
226    pub window_type: WindowType,
227    /// Minimum frequency for analysis (Hz)
228    pub min_frequency: f32,
229    /// Maximum frequency for analysis (Hz)
230    pub max_frequency: f32,
231    /// Enable detailed analysis (slower but more accurate)
232    pub detailed: bool,
233}
234
235impl Default for AnalysisConfig {
236    fn default() -> Self {
237        Self {
238            fft_size: 2048,
239            hop_size: 512,
240            window_type: WindowType::Hann,
241            min_frequency: 20.0,
242            max_frequency: 20000.0,
243            detailed: false,
244        }
245    }
246}
247
248/// Window function types for spectral analysis.
249#[derive(Debug, Clone, Copy, PartialEq, Eq)]
250pub enum WindowType {
251    /// Hann window (cosine-squared, good general purpose)
252    Hann,
253    /// Hamming window (modified cosine)
254    Hamming,
255    /// Blackman window (better frequency resolution)
256    Blackman,
257    /// Blackman-Harris window (very low sidelobes)
258    BlackmanHarris,
259    /// Rectangular window (no windowing)
260    Rectangular,
261}
262
263/// Main audio analyzer that coordinates all analysis modules.
264pub struct AudioAnalyzer {
265    config: AnalysisConfig,
266    spectral_analyzer: spectral::SpectralAnalyzer,
267    voice_analyzer: voice::VoiceAnalyzer,
268    pitch_tracker: pitch::PitchTracker,
269    formant_analyzer: formant::FormantAnalyzer,
270    dynamics_analyzer: dynamics::DynamicsAnalyzer,
271    transient_detector: transient::TransientDetector,
272}
273
274impl AudioAnalyzer {
275    /// Create a new audio analyzer with the given configuration.
276    #[must_use]
277    pub fn new(config: AnalysisConfig) -> Self {
278        Self {
279            spectral_analyzer: spectral::SpectralAnalyzer::new(config.clone()),
280            voice_analyzer: voice::VoiceAnalyzer::new(config.clone()),
281            pitch_tracker: pitch::PitchTracker::new(config.clone()),
282            formant_analyzer: formant::FormantAnalyzer::new(config.clone()),
283            dynamics_analyzer: dynamics::DynamicsAnalyzer::new(config.clone()),
284            transient_detector: transient::TransientDetector::new(config.clone()),
285            config,
286        }
287    }
288
289    /// Perform comprehensive audio analysis on the given samples.
290    ///
291    /// # Arguments
292    /// * `samples` - Audio samples (mono or interleaved stereo)
293    /// * `sample_rate` - Sample rate in Hz
294    ///
295    /// # Returns
296    /// Complete analysis results including spectral, temporal, and high-level features.
297    pub fn analyze(&self, samples: &[f32], sample_rate: f32) -> Result<AnalysisResult> {
298        if !(8_000.0..=192_000.0).contains(&sample_rate) {
299            return Err(AnalysisError::InvalidSampleRate(sample_rate));
300        }
301
302        if samples.len() < self.config.fft_size {
303            return Err(AnalysisError::InsufficientSamples {
304                needed: self.config.fft_size,
305                got: samples.len(),
306            });
307        }
308
309        // Perform all analyses
310        let spectral = self.spectral_analyzer.analyze(samples, sample_rate)?;
311        let pitch_result = self.pitch_tracker.track(samples, sample_rate)?;
312        let formants = self.formant_analyzer.analyze(samples, sample_rate)?;
313        let dynamics = self.dynamics_analyzer.analyze(samples, sample_rate)?;
314        let transients = self.transient_detector.detect(samples, sample_rate)?;
315
316        // Voice analysis (optional, depends on pitch detection)
317        let voice = if pitch_result.mean_f0 > 0.0 && pitch_result.voicing_rate > 0.5 {
318            Some(self.voice_analyzer.analyze(samples, sample_rate)?)
319        } else {
320            None
321        };
322
323        Ok(AnalysisResult {
324            spectral,
325            pitch: pitch_result,
326            formants,
327            dynamics,
328            transients,
329            voice,
330        })
331    }
332
333    /// Analyze audio in real-time, frame by frame.
334    pub fn analyze_frame(&mut self, samples: &[f32], sample_rate: f32) -> Result<FrameAnalysis> {
335        let spectral = self.spectral_analyzer.analyze_frame(samples, sample_rate)?;
336        let pitch = self.pitch_tracker.track_frame(samples, sample_rate)?;
337        let rms = compute_rms(samples);
338
339        Ok(FrameAnalysis {
340            spectral,
341            pitch,
342            rms,
343        })
344    }
345}
346
347/// Complete analysis result.
348#[derive(Debug, Clone)]
349pub struct AnalysisResult {
350    /// Spectral analysis results
351    pub spectral: spectral::SpectralFeatures,
352    /// Pitch tracking results
353    pub pitch: pitch::PitchResult,
354    /// Formant analysis results
355    pub formants: formant::FormantResult,
356    /// Dynamic range analysis results
357    pub dynamics: dynamics::DynamicsResult,
358    /// Transient detection results
359    pub transients: transient::TransientResult,
360    /// Voice analysis results (optional)
361    pub voice: Option<voice::VoiceCharacteristics>,
362}
363
364/// Frame-level analysis result for real-time processing.
365#[derive(Debug, Clone)]
366pub struct FrameAnalysis {
367    /// Spectral features
368    pub spectral: spectral::SpectralFeatures,
369    /// Pitch estimate
370    pub pitch: pitch::PitchEstimate,
371    /// RMS level
372    pub rms: f32,
373}
374
375/// Generate window function of the specified type and size.
376#[must_use]
377pub fn generate_window(window_type: WindowType, size: usize) -> Vec<f32> {
378    match window_type {
379        WindowType::Hann => hann_window(size),
380        WindowType::Hamming => hamming_window(size),
381        WindowType::Blackman => blackman_window(size),
382        WindowType::BlackmanHarris => blackman_harris_window(size),
383        WindowType::Rectangular => vec![1.0; size],
384    }
385}
386
387fn hann_window(size: usize) -> Vec<f32> {
388    (0..size)
389        .map(|i| {
390            let x = std::f32::consts::PI * i as f32 / (size - 1) as f32;
391            0.5 * (1.0 - x.cos())
392        })
393        .collect()
394}
395
396fn hamming_window(size: usize) -> Vec<f32> {
397    (0..size)
398        .map(|i| {
399            let x = 2.0 * std::f32::consts::PI * i as f32 / (size - 1) as f32;
400            0.54 - 0.46 * x.cos()
401        })
402        .collect()
403}
404
405fn blackman_window(size: usize) -> Vec<f32> {
406    (0..size)
407        .map(|i| {
408            let x = 2.0 * std::f32::consts::PI * i as f32 / (size - 1) as f32;
409            0.42 - 0.5 * x.cos() + 0.08 * (2.0 * x).cos()
410        })
411        .collect()
412}
413
414fn blackman_harris_window(size: usize) -> Vec<f32> {
415    (0..size)
416        .map(|i| {
417            let x = 2.0 * std::f32::consts::PI * i as f32 / (size - 1) as f32;
418            0.35875 - 0.48829 * x.cos() + 0.14128 * (2.0 * x).cos() - 0.01168 * (3.0 * x).cos()
419        })
420        .collect()
421}
422
423/// Compute RMS (Root Mean Square) level of audio samples.
424#[must_use]
425pub fn compute_rms(samples: &[f32]) -> f32 {
426    if samples.is_empty() {
427        return 0.0;
428    }
429
430    let sum_squares: f32 = samples.iter().map(|&x| x * x).sum();
431    (sum_squares / samples.len() as f32).sqrt()
432}
433
434/// Compute zero-crossing rate.
435#[must_use]
436pub fn zero_crossing_rate(samples: &[f32]) -> f32 {
437    if samples.len() < 2 {
438        return 0.0;
439    }
440
441    let mut crossings = 0;
442    for i in 1..samples.len() {
443        if (samples[i] >= 0.0 && samples[i - 1] < 0.0)
444            || (samples[i] < 0.0 && samples[i - 1] >= 0.0)
445        {
446            crossings += 1;
447        }
448    }
449
450    crossings as f32 / (samples.len() - 1) as f32
451}
452
453/// Convert amplitude to decibels.
454#[must_use]
455pub fn amplitude_to_db(amplitude: f32) -> f32 {
456    if amplitude <= 0.0 {
457        -100.0 // Floor at -100 dB
458    } else {
459        20.0 * amplitude.log10()
460    }
461}
462
463/// Convert decibels to amplitude.
464#[must_use]
465pub fn db_to_amplitude(db: f32) -> f32 {
466    10.0_f32.powf(db / 20.0)
467}
468
469#[cfg(test)]
470mod tests {
471    use super::*;
472
473    #[test]
474    fn test_window_generation() {
475        let size = 1024;
476        let hann = generate_window(WindowType::Hann, size);
477        assert_eq!(hann.len(), size);
478        assert!(hann[0] < 0.01); // Near zero at start
479                                 // Hann window maximum should be near center
480        let max_val = hann.iter().copied().fold(0.0_f32, f32::max);
481        assert!(max_val > 0.9); // Maximum value should be close to 1
482    }
483
484    #[test]
485    fn test_rms_computation() {
486        let samples = vec![1.0, -1.0, 1.0, -1.0];
487        let rms = compute_rms(&samples);
488        assert!((rms - 1.0).abs() < 1e-6);
489
490        let zeros = vec![0.0; 100];
491        assert_eq!(compute_rms(&zeros), 0.0);
492    }
493
494    #[test]
495    fn test_zero_crossing_rate() {
496        let samples = vec![1.0, -1.0, 1.0, -1.0, 1.0];
497        let zcr = zero_crossing_rate(&samples);
498        assert!((zcr - 1.0).abs() < 1e-6); // All transitions are crossings
499
500        let constant = vec![1.0; 10];
501        assert_eq!(zero_crossing_rate(&constant), 0.0);
502    }
503
504    #[test]
505    fn test_db_conversion() {
506        let amp = 0.5;
507        let db = amplitude_to_db(amp);
508        let back = db_to_amplitude(db);
509        assert!((amp - back).abs() < 1e-6);
510
511        assert_eq!(amplitude_to_db(1.0), 0.0);
512        assert_eq!(amplitude_to_db(0.0), -100.0);
513    }
514
515    #[test]
516    fn test_analysis_config() {
517        let config = AnalysisConfig::default();
518        assert_eq!(config.fft_size, 2048);
519        assert_eq!(config.hop_size, 512);
520        assert_eq!(config.window_type, WindowType::Hann);
521    }
522}