oximedia_mir/
lib.rs

1//! Music Information Retrieval (MIR) system for `OxiMedia`.
2//!
3//! This crate provides comprehensive music analysis capabilities for audio content:
4//!
5//! # Tempo and Beat Analysis
6//!
7//! - **Tempo Detection** - BPM detection using autocorrelation and comb filtering
8//! - **Beat Tracking** - Beat and downbeat detection with dynamic programming
9//! - **Onset Detection** - Transient detection using spectral flux and HFC
10//!
11//! # Tonal Analysis
12//!
13//! - **Key Detection** - Musical key detection (Krumhansl-Schmuckler algorithm)
14//! - **Chord Recognition** - Chord progression analysis using chroma features
15//! - **Melody Extraction** - Dominant melody line extraction
16//! - **Harmonic Analysis** - Harmonic-percussive separation
17//!
18//! # Structure Analysis
19//!
20//! - **Structural Segmentation** - Section boundary detection
21//! - **Self-Similarity Analysis** - Pattern and repetition detection
22//! - **Section Labeling** - Intro, verse, chorus, bridge identification
23//!
24//! # High-Level Features
25//!
26//! - **Genre Classification** - Genre detection from audio features
27//! - **Mood Detection** - Valence and arousal estimation
28//! - **Loudness Analysis** - Integrated loudness and dynamics
29//!
30//! # Low-Level Features
31//!
32//! - **Spectral Features** - Centroid, rolloff, flux, contrast
33//! - **Rhythm Features** - Rhythm patterns and complexity
34//! - **Pitch Features** - Pitch class profiles and chromagrams
35//!
36//! # Usage
37//!
38//! ```no_run
39//! use oximedia_mir::{MirAnalyzer, MirConfig, FeatureSet};
40//!
41//! // Create analyzer with default configuration
42//! let config = MirConfig::default();
43//! let analyzer = MirAnalyzer::new(config);
44//!
45//! // Analyze audio samples (f32, mono or stereo)
46//! let samples = vec![0.0_f32; 44100]; // 1 second of silence
47//! let sample_rate = 44100.0;
48//!
49//! // Perform analysis
50//! let result = analyzer.analyze(&samples, sample_rate)?;
51//!
52//! // Access results
53//! if let Some(ref tempo) = result.tempo {
54//!     println!("Tempo: {:.1} BPM (confidence: {:.2})", tempo.bpm, tempo.confidence);
55//! }
56//! if let Some(ref key) = result.key {
57//!     println!("Key: {} (confidence: {:.2})", key.key, key.confidence);
58//! }
59//! if let Some(ref genre) = result.genre {
60//!     println!("Genre: {} (confidence: {:.2})", genre.top_genre().0, genre.top_genre().1);
61//! }
62//!
63//! # Ok::<(), oximedia_mir::MirError>(())
64//! ```
65//!
66//! # Patent-Free Implementation
67//!
68//! All algorithms are implemented using patent-free methods:
69//! - Autocorrelation-based tempo detection
70//! - Chroma-based chord recognition
71//! - Spectral-based onset detection
72//! - Krumhansl-Schmuckler key detection
73//!
74//! # Real-Time Capable
75//!
76//! Many features support frame-by-frame processing for real-time applications.
77
78#![warn(missing_docs)]
79#![allow(clippy::cast_precision_loss)]
80#![allow(clippy::cast_possible_truncation)]
81#![allow(clippy::cast_sign_loss)]
82#![allow(clippy::similar_names)]
83#![allow(clippy::many_single_char_names)]
84#![allow(clippy::unreadable_literal)]
85#![allow(clippy::let_and_return)]
86#![allow(clippy::if_same_then_else)]
87#![allow(clippy::trivially_copy_pass_by_ref)]
88#![allow(clippy::unused_self)]
89#![allow(clippy::module_name_repetitions)]
90#![allow(dead_code)]
91
92pub mod audio_events;
93pub mod audio_features;
94pub mod beat;
95pub mod beat_tracker;
96pub mod beat_tracking;
97pub mod chord;
98pub mod chord_recognition;
99pub mod chorus_detect;
100pub mod cover_detect;
101pub mod dynamic_range;
102pub mod energy_contour;
103pub mod fade_detect;
104pub mod fingerprint;
105pub mod genre;
106pub mod genre_classifier;
107pub mod genre_classify;
108pub mod harmonic;
109pub mod harmonic_analysis;
110pub mod instrument;
111pub mod instrument_detection;
112pub mod key;
113pub mod key_detection;
114pub mod loudness;
115pub mod melody;
116pub mod midi;
117pub mod mir_feature;
118pub mod mood;
119pub mod mood_detection;
120pub mod music_summary;
121pub mod onset_strength;
122pub mod pitch_key;
123pub mod pitch_track;
124pub mod playlist;
125pub mod playlist_gen;
126pub mod rhythm;
127pub mod rhythm_pattern;
128pub mod segmentation;
129pub mod similarity;
130pub mod source_separation;
131pub mod spectral;
132pub mod spectral_contrast;
133pub mod spectral_features;
134pub mod streaming;
135pub mod structure;
136pub mod structure_analysis;
137pub mod tempo;
138pub mod tempo_map;
139pub mod tuning_detect;
140pub mod vocal_detect;
141
142/// 12-bin chroma vector computation (pitch-class profiling, chord/key analysis).
143pub mod chromagram;
144
145/// Cached chromagram that amortises FFT cost across multiple consumers.
146pub mod chroma_cache;
147
148/// Audio watermark embedding (spread-spectrum LSB steganography).
149pub mod watermark;
150
151/// Audio watermark detection and extraction.
152pub mod watermark_detect;
153
154/// Cosine, Earth-mover, and multi-modal audio similarity measures.
155pub mod audio_similarity;
156
157/// Visual cover-art feature extraction (dominant colours, edge histograms).
158pub mod cover_art_features;
159
160/// DJ workflow features: Camelot wheel, beat-matching, compatibility scoring.
161pub mod dj_features;
162
163/// Neural-network-free rule-based genre classification (spectral features).
164pub mod genre_classify_new;
165
166/// Harmonic-percussive source separation (median-filter HPSS).
167pub mod harmonic_percussive;
168
169/// Inharmonicity, HNR, and THD from the harmonic series.
170pub mod harmonic_spectral;
171
172/// Instrument family and voice classification.
173pub mod instrument_classifier;
174
175/// Locality-sensitive hashing for fast approximate audio nearest-neighbour.
176pub mod lsh_similarity;
177
178/// Forced alignment of lyrics text to detected onset timestamps.
179pub mod lyrics_align;
180
181/// Energy/zero-crossing melody extraction and contour shape analysis.
182pub mod melody_extract;
183
184/// Harmonic-salience Viterbi melody extractor with vibrato detection.
185pub mod melody_extractor;
186
187/// Multi-stem analysis with per-stem feature extraction.
188pub mod multitrack;
189
190/// Onset peak picking with configurable threshold and minimum interval.
191pub mod onset_peak;
192
193/// Structural section segmentation via novelty-curve self-similarity.
194pub mod section_segmenter;
195
196/// Brute-force and LSH similarity search over fingerprint indices.
197pub mod similarity_search;
198
199/// Sub-genre tagging within broad genre families.
200pub mod subgenre;
201
202/// Tempo stability analysis (variance, drift, class).
203pub mod tempo_stability;
204
205/// Waveform thumbnail generation for audio browsers.
206pub mod thumbnail;
207
208#[cfg(feature = "onnx")]
209pub mod ml;
210
211mod error;
212mod types;
213mod utils;
214
215pub use error::{MirError, MirResult};
216
217pub use midi::{AudioToMidi, AudioToMidiConfig, MidiNote, MidiTempo, MidiTranscription};
218#[cfg(feature = "onnx")]
219pub use ml::{
220    activate_and_rank, apply_activation, MusicTagger, MusicTags, TagActivation, TagActivationScore,
221    DEFAULT_TOP_K,
222};
223pub use streaming::{
224    StreamingAnalysisSummary, StreamingAnalyzer, StreamingConfig, StreamingFrameFeatures,
225};
226pub use types::{
227    AnalysisResult, BeatResult, ChordResult, FeatureSet, GenreResult, HarmonicResult, KeyResult,
228    LoudnessResult, MelodyResult, MoodResult, RhythmResult, SpectralResult, StructureResult,
229    TempoResult,
230};
231
232use rayon::prelude::*;
233use std::collections::HashMap;
234
235/// Configuration for MIR analysis.
236#[derive(Debug, Clone)]
237#[allow(clippy::struct_excessive_bools)]
238pub struct MirConfig {
239    /// Window size for frame-based analysis (samples).
240    pub window_size: usize,
241
242    /// Hop size for frame-based analysis (samples).
243    pub hop_size: usize,
244
245    /// Minimum tempo to detect (BPM).
246    pub min_tempo: f32,
247
248    /// Maximum tempo to detect (BPM).
249    pub max_tempo: f32,
250
251    /// Enable beat tracking.
252    pub enable_beat_tracking: bool,
253
254    /// Enable key detection.
255    pub enable_key_detection: bool,
256
257    /// Enable chord recognition.
258    pub enable_chord_recognition: bool,
259
260    /// Enable melody extraction.
261    pub enable_melody_extraction: bool,
262
263    /// Enable structure analysis.
264    pub enable_structure_analysis: bool,
265
266    /// Enable genre classification.
267    pub enable_genre_classification: bool,
268
269    /// Enable mood detection.
270    pub enable_mood_detection: bool,
271
272    /// Enable spectral features.
273    pub enable_spectral_features: bool,
274
275    /// Enable rhythm features.
276    pub enable_rhythm_features: bool,
277
278    /// Enable harmonic analysis.
279    pub enable_harmonic_analysis: bool,
280
281    /// Confidence threshold for tempo detection (0.0 to 1.0).
282    /// Results below this threshold are discarded (set to `None`).
283    pub confidence_threshold_tempo: f32,
284
285    /// Confidence threshold for key detection (0.0 to 1.0).
286    pub confidence_threshold_key: f32,
287
288    /// Confidence threshold for chord recognition (0.0 to 1.0).
289    /// Chords below this threshold are filtered from the result.
290    pub confidence_threshold_chord: f32,
291
292    /// Confidence threshold for genre classification (0.0 to 1.0).
293    pub confidence_threshold_genre: f32,
294
295    /// Confidence threshold for mood detection (0.0 to 1.0).
296    pub confidence_threshold_mood: f32,
297
298    /// Number of stereo channels for mono conversion.
299    /// Set to 2 to force stereo-to-mono conversion. 1 = mono input.
300    pub num_channels: u8,
301}
302
303impl Default for MirConfig {
304    fn default() -> Self {
305        Self {
306            window_size: 2048,
307            hop_size: 512,
308            min_tempo: 60.0,
309            max_tempo: 200.0,
310            enable_beat_tracking: true,
311            enable_key_detection: true,
312            enable_chord_recognition: true,
313            enable_melody_extraction: true,
314            enable_structure_analysis: true,
315            enable_genre_classification: true,
316            enable_mood_detection: true,
317            enable_spectral_features: true,
318            enable_rhythm_features: true,
319            enable_harmonic_analysis: true,
320            confidence_threshold_tempo: 0.0,
321            confidence_threshold_key: 0.0,
322            confidence_threshold_chord: 0.0,
323            confidence_threshold_genre: 0.0,
324            confidence_threshold_mood: 0.0,
325            num_channels: 1,
326        }
327    }
328}
329
330/// Main MIR analyzer.
331pub struct MirAnalyzer {
332    config: MirConfig,
333}
334
335impl MirAnalyzer {
336    /// Create a new MIR analyzer with the given configuration.
337    #[must_use]
338    pub fn new(config: MirConfig) -> Self {
339        Self { config }
340    }
341
342    /// Analyze audio samples and extract all enabled features.
343    ///
344    /// Independent analysis branches (key, chord, melody, structure, genre,
345    /// mood, spectral, rhythm, harmonic) are executed in parallel using rayon.
346    /// Tempo detection is run first because beat tracking depends on its result.
347    ///
348    /// # Arguments
349    ///
350    /// * `samples` - Audio samples (f32, mono or interleaved stereo)
351    /// * `sample_rate` - Sample rate in Hz
352    ///
353    /// # Returns
354    ///
355    /// Complete analysis results including all enabled features.
356    ///
357    /// # Errors
358    ///
359    /// Returns error if analysis fails.
360    #[allow(clippy::too_many_lines)]
361    #[allow(clippy::cast_precision_loss)]
362    pub fn analyze(&self, samples: &[f32], sample_rate: f32) -> MirResult<AnalysisResult> {
363        // Convert to mono if stereo (force conversion when num_channels == 2)
364        let mono = if self.config.num_channels == 2 {
365            // Forced stereo-to-mono: average interleaved L/R pairs
366            let half = samples.len() / 2;
367            let mut out = Vec::with_capacity(half);
368            for i in 0..half {
369                out.push((samples[i * 2] + samples[i * 2 + 1]) * 0.5);
370            }
371            out
372        } else {
373            self.to_mono(samples)
374        };
375
376        // ── Tempo is needed for beat tracking so must run first ───────────
377        let tempo = if self.config.enable_beat_tracking {
378            let detector = tempo::TempoDetector::new(
379                sample_rate,
380                self.config.min_tempo,
381                self.config.max_tempo,
382            );
383            Some(detector.detect(&mono)?)
384        } else {
385            None
386        };
387
388        // Beat tracking depends on tempo result
389        let beat = if self.config.enable_beat_tracking {
390            let tracker = beat::BeatTracker::new(sample_rate, self.config.hop_size);
391            Some(tracker.track(&mono, tempo.as_ref())?)
392        } else {
393            None
394        };
395
396        // ── Parallel branch: 8 independent analyses ───────────────────────
397        //
398        // We represent each branch as a distinct index in a flat array so that
399        // rayon can schedule them across the thread pool without needing to
400        // unify heterogeneous closures into a single type.
401        //
402        //  0 = key          4 = genre
403        //  1 = chord        5 = mood
404        //  2 = melody       6 = spectral
405        //  3 = structure    7 = rhythm
406        //  8 = harmonic
407        //
408        // Each branch returns `Result<Option<BranchResult>, MirError>` encoded
409        // as a `BranchOutput` enum to allow a single parallel collect pass.
410
411        #[allow(clippy::large_enum_variant)]
412        enum BranchOutput {
413            Key(MirResult<Option<KeyResult>>),
414            Chord(MirResult<Option<ChordResult>>),
415            Melody(MirResult<Option<MelodyResult>>),
416            Structure(MirResult<Option<StructureResult>>),
417            Genre(MirResult<Option<GenreResult>>),
418            Mood(MirResult<Option<MoodResult>>),
419            Spectral(MirResult<Option<SpectralResult>>),
420            Rhythm(MirResult<Option<RhythmResult>>),
421            Harmonic(MirResult<Option<HarmonicResult>>),
422        }
423
424        let cfg = &self.config;
425        let mono_ref: &[f32] = &mono;
426
427        let results: Vec<BranchOutput> = (0_u8..9)
428            .into_par_iter()
429            .map(|branch| match branch {
430                0 => BranchOutput::Key(if cfg.enable_key_detection {
431                    let det = key::KeyDetector::new(sample_rate, cfg.window_size);
432                    det.detect(mono_ref).map(Some)
433                } else {
434                    Ok(None)
435                }),
436                1 => BranchOutput::Chord(if cfg.enable_chord_recognition {
437                    let rec =
438                        chord::ChordRecognizer::new(sample_rate, cfg.window_size, cfg.hop_size);
439                    rec.recognize(mono_ref).map(Some)
440                } else {
441                    Ok(None)
442                }),
443                2 => BranchOutput::Melody(if cfg.enable_melody_extraction {
444                    let ext =
445                        melody::MelodyExtractor::new(sample_rate, cfg.window_size, cfg.hop_size);
446                    ext.extract(mono_ref).map(Some)
447                } else {
448                    Ok(None)
449                }),
450                3 => BranchOutput::Structure(if cfg.enable_structure_analysis {
451                    let ana = structure::StructureAnalyzer::new(
452                        sample_rate,
453                        cfg.window_size,
454                        cfg.hop_size,
455                    );
456                    ana.analyze(mono_ref).map(Some)
457                } else {
458                    Ok(None)
459                }),
460                4 => BranchOutput::Genre(if cfg.enable_genre_classification {
461                    let cls = genre::GenreClassifier::new(sample_rate);
462                    cls.classify(mono_ref).map(Some)
463                } else {
464                    Ok(None)
465                }),
466                5 => BranchOutput::Mood(if cfg.enable_mood_detection {
467                    let det = mood::MoodDetector::new(sample_rate);
468                    det.detect(mono_ref).map(Some)
469                } else {
470                    Ok(None)
471                }),
472                6 => BranchOutput::Spectral(if cfg.enable_spectral_features {
473                    let ana =
474                        spectral::SpectralAnalyzer::new(sample_rate, cfg.window_size, cfg.hop_size);
475                    ana.analyze(mono_ref).map(Some)
476                } else {
477                    Ok(None)
478                }),
479                7 => BranchOutput::Rhythm(if cfg.enable_rhythm_features {
480                    let ana = rhythm::RhythmAnalyzer::new(sample_rate, cfg.hop_size);
481                    ana.analyze(mono_ref).map(Some)
482                } else {
483                    Ok(None)
484                }),
485                _ => BranchOutput::Harmonic(if cfg.enable_harmonic_analysis {
486                    let ana =
487                        harmonic::HarmonicAnalyzer::new(sample_rate, cfg.window_size, cfg.hop_size);
488                    ana.analyze(mono_ref).map(Some)
489                } else {
490                    Ok(None)
491                }),
492            })
493            .collect();
494
495        // ── Unpack parallel results ────────────────────────────────────────
496        let mut key_res: Option<KeyResult> = None;
497        let mut chord_res: Option<ChordResult> = None;
498        let mut melody_res: Option<MelodyResult> = None;
499        let mut structure_res: Option<StructureResult> = None;
500        let mut genre_res: Option<GenreResult> = None;
501        let mut mood_res: Option<MoodResult> = None;
502        let mut spectral_res: Option<SpectralResult> = None;
503        let mut rhythm_res: Option<RhythmResult> = None;
504        let mut harmonic_res: Option<HarmonicResult> = None;
505
506        for output in results {
507            match output {
508                BranchOutput::Key(r) => key_res = r?,
509                BranchOutput::Chord(r) => chord_res = r?,
510                BranchOutput::Melody(r) => melody_res = r?,
511                BranchOutput::Structure(r) => structure_res = r?,
512                BranchOutput::Genre(r) => genre_res = r?,
513                BranchOutput::Mood(r) => mood_res = r?,
514                BranchOutput::Spectral(r) => spectral_res = r?,
515                BranchOutput::Rhythm(r) => rhythm_res = r?,
516                BranchOutput::Harmonic(r) => harmonic_res = r?,
517            }
518        }
519
520        // ── Apply confidence thresholds -- discard low-confidence results ─
521        let tempo = tempo.and_then(|t| {
522            if t.confidence >= self.config.confidence_threshold_tempo {
523                Some(t)
524            } else {
525                None
526            }
527        });
528
529        let key = key_res.and_then(|k| {
530            if k.confidence >= self.config.confidence_threshold_key {
531                Some(k)
532            } else {
533                None
534            }
535        });
536
537        let chord = chord_res.map(|mut c| {
538            if self.config.confidence_threshold_chord > 0.0 {
539                c.chords
540                    .retain(|ch| ch.confidence >= self.config.confidence_threshold_chord);
541            }
542            c
543        });
544
545        let genre = genre_res.and_then(|g| {
546            if g.top_genre_confidence >= self.config.confidence_threshold_genre {
547                Some(g)
548            } else {
549                None
550            }
551        });
552
553        let mood = mood_res.and_then(|m| {
554            if m.intensity >= self.config.confidence_threshold_mood {
555                Some(m)
556            } else {
557                None
558            }
559        });
560
561        Ok(AnalysisResult {
562            tempo,
563            beat,
564            key,
565            chord,
566            melody: melody_res,
567            structure: structure_res,
568            genre,
569            mood,
570            spectral: spectral_res,
571            rhythm: rhythm_res,
572            harmonic: harmonic_res,
573            sample_rate,
574            duration: mono.len() as f32 / sample_rate,
575        })
576    }
577
578    /// Convert stereo to mono by averaging channels.
579    ///
580    /// Detects whether the input is stereo (interleaved L/R pairs) by checking
581    /// if the sample count is even and the left/right channels show sufficient
582    /// decorrelation. Falls back to treating the signal as mono if not stereo.
583    fn to_mono(&self, samples: &[f32]) -> Vec<f32> {
584        if samples.len() < 4 || samples.len() % 2 != 0 {
585            return samples.to_vec();
586        }
587
588        // Heuristic: check if interleaved stereo by computing L/R correlation.
589        // True stereo signals typically have decorrelated channels.
590        let half = samples.len() / 2;
591        let mut sum_l = 0.0_f64;
592        let mut sum_r = 0.0_f64;
593        let mut sum_ll = 0.0_f64;
594        let mut sum_rr = 0.0_f64;
595        let mut sum_lr = 0.0_f64;
596
597        // Sample up to 4096 pairs to keep this fast
598        let check_count = half.min(4096);
599        for i in 0..check_count {
600            let l = f64::from(samples[i * 2]);
601            let r = f64::from(samples[i * 2 + 1]);
602            sum_l += l;
603            sum_r += r;
604            sum_ll += l * l;
605            sum_rr += r * r;
606            sum_lr += l * r;
607        }
608
609        let n = check_count as f64;
610        let var_l = (sum_ll / n) - (sum_l / n).powi(2);
611        let var_r = (sum_rr / n) - (sum_r / n).powi(2);
612
613        // If both channels have near-zero variance, treat as mono (silence or DC)
614        if var_l < 1e-10 && var_r < 1e-10 {
615            return samples.to_vec();
616        }
617
618        // Pearson correlation
619        let denom = (var_l * var_r).sqrt();
620        let correlation = if denom > 1e-12 {
621            ((sum_lr / n) - (sum_l / n) * (sum_r / n)) / denom
622        } else {
623            1.0 // One channel is constant => treat as mono
624        };
625
626        // If correlation is very high (> 0.98), L and R are nearly identical:
627        // the signal is likely mono data, not interleaved stereo.
628        if correlation > 0.98 {
629            return samples.to_vec();
630        }
631
632        // Down-mix interleaved stereo to mono by averaging L/R pairs.
633        let mut mono = Vec::with_capacity(half);
634        for i in 0..half {
635            mono.push((samples[i * 2] + samples[i * 2 + 1]) * 0.5);
636        }
637        mono
638    }
639
640    /// Extract specific feature set.
641    ///
642    /// # Errors
643    ///
644    /// Returns error if feature extraction fails.
645    pub fn extract_features(
646        &self,
647        samples: &[f32],
648        sample_rate: f32,
649        features: FeatureSet,
650    ) -> MirResult<HashMap<String, Vec<f32>>> {
651        let mono = self.to_mono(samples);
652        let mut result = HashMap::new();
653
654        if features.contains(FeatureSet::SPECTRAL) {
655            let analyzer = spectral::SpectralAnalyzer::new(
656                sample_rate,
657                self.config.window_size,
658                self.config.hop_size,
659            );
660            let spectral = analyzer.analyze(&mono)?;
661            result.insert("spectral_centroid".to_string(), spectral.centroid);
662            result.insert("spectral_rolloff".to_string(), spectral.rolloff);
663            result.insert("spectral_flux".to_string(), spectral.flux);
664        }
665
666        if features.contains(FeatureSet::RHYTHM) {
667            let analyzer = rhythm::RhythmAnalyzer::new(sample_rate, self.config.hop_size);
668            let rhythm = analyzer.analyze(&mono)?;
669            result.insert("onset_strength".to_string(), rhythm.onset_strength);
670        }
671
672        Ok(result)
673    }
674}
675
676#[cfg(test)]
677mod tests {
678    use super::*;
679    use std::f32::consts::TAU;
680
681    #[test]
682    fn test_mir_config_default() {
683        let config = MirConfig::default();
684        assert_eq!(config.window_size, 2048);
685        assert_eq!(config.hop_size, 512);
686        assert!(config.enable_beat_tracking);
687        assert!((config.confidence_threshold_tempo - 0.0).abs() < f32::EPSILON);
688        assert!((config.confidence_threshold_key - 0.0).abs() < f32::EPSILON);
689        assert_eq!(config.num_channels, 1);
690    }
691
692    #[test]
693    fn test_mir_analyzer_creation() {
694        let config = MirConfig::default();
695        let _analyzer = MirAnalyzer::new(config);
696    }
697
698    #[test]
699    fn test_analyze_silence() {
700        let config = MirConfig {
701            enable_beat_tracking: false, // Disable beat tracking for silence test
702            enable_genre_classification: false,
703            enable_structure_analysis: false,
704            ..MirConfig::default()
705        };
706        let analyzer = MirAnalyzer::new(config);
707        let samples = vec![0.0_f32; 44100]; // 1 second of silence
708        let result = analyzer.analyze(&samples, 44100.0);
709        assert!(result.is_ok());
710    }
711
712    // ── to_mono tests ──
713
714    #[test]
715    fn test_to_mono_mono_input() {
716        let config = MirConfig::default();
717        let analyzer = MirAnalyzer::new(config);
718        let mono = vec![1.0, 2.0, 3.0, 4.0, 5.0];
719        let result = analyzer.to_mono(&mono);
720        // Odd-length input is always treated as mono
721        assert_eq!(result.len(), 5);
722    }
723
724    #[test]
725    fn test_to_mono_stereo_detection() {
726        let config = MirConfig::default();
727        let analyzer = MirAnalyzer::new(config);
728        let sr = 44100.0;
729        let n = 8820; // ~200ms per channel
730
731        // Create interleaved stereo with decorrelated channels
732        let mut stereo = Vec::with_capacity(n * 2);
733        for i in 0..n {
734            let t = i as f32 / sr;
735            let left = (TAU * 440.0 * t).sin(); // A4
736            let right = (TAU * 554.37 * t).sin(); // C#5 -- different note
737            stereo.push(left);
738            stereo.push(right);
739        }
740
741        let result = analyzer.to_mono(&stereo);
742        // Stereo detected => halved length
743        assert_eq!(result.len(), n);
744        // First sample should be average of L and R
745        let expected = (stereo[0] + stereo[1]) * 0.5;
746        assert!((result[0] - expected).abs() < 1e-6);
747    }
748
749    #[test]
750    fn test_to_mono_identical_channels_treated_as_mono() {
751        let config = MirConfig::default();
752        let analyzer = MirAnalyzer::new(config);
753
754        // Interleaved but identical channels -> correlation ~1.0 -> treat as mono
755        let mut data = Vec::with_capacity(8000);
756        for i in 0..4000 {
757            let v = (i as f32 / 100.0).sin();
758            data.push(v);
759            data.push(v); // Same value on both "channels"
760        }
761
762        let result = analyzer.to_mono(&data);
763        // Should keep original length (treated as mono)
764        assert_eq!(result.len(), 8000);
765    }
766
767    #[test]
768    fn test_to_mono_short_signal() {
769        let config = MirConfig::default();
770        let analyzer = MirAnalyzer::new(config);
771        let short = vec![1.0, 2.0];
772        let result = analyzer.to_mono(&short);
773        assert_eq!(result.len(), 2);
774    }
775
776    // ── Confidence threshold tests ──
777
778    #[test]
779    fn test_confidence_threshold_filters_tempo() {
780        let config = MirConfig {
781            enable_beat_tracking: true,
782            enable_key_detection: false,
783            enable_chord_recognition: false,
784            enable_melody_extraction: false,
785            enable_structure_analysis: false,
786            enable_genre_classification: false,
787            enable_mood_detection: false,
788            enable_spectral_features: false,
789            enable_rhythm_features: false,
790            enable_harmonic_analysis: false,
791            confidence_threshold_tempo: 0.999, // Very high threshold
792            ..MirConfig::default()
793        };
794        let analyzer = MirAnalyzer::new(config);
795
796        // Generate a signal with some periodic content
797        let sr = 44100.0;
798        let mut signal = Vec::new();
799        for i in 0..(sr as usize * 3) {
800            let t = i as f32 / sr;
801            signal.push((TAU * 440.0 * t).sin());
802        }
803
804        let result = analyzer.analyze(&signal, sr);
805        assert!(result.is_ok());
806        // With threshold 0.999, tempo is likely filtered out
807        // (detection of a pure tone rarely gives near-perfect confidence)
808    }
809
810    #[test]
811    fn test_confidence_threshold_zero_keeps_all() {
812        let config = MirConfig {
813            enable_beat_tracking: false,
814            enable_key_detection: true,
815            enable_chord_recognition: false,
816            enable_melody_extraction: false,
817            enable_structure_analysis: false,
818            enable_genre_classification: false,
819            enable_mood_detection: false,
820            enable_spectral_features: false,
821            enable_rhythm_features: false,
822            enable_harmonic_analysis: false,
823            confidence_threshold_key: 0.0, // Accept everything
824            ..MirConfig::default()
825        };
826        let analyzer = MirAnalyzer::new(config);
827        let sr = 22050.0;
828        let mut signal = Vec::new();
829        for i in 0..(sr as usize * 2) {
830            let t = i as f32 / sr;
831            signal.push((TAU * 261.63 * t).sin()); // C note
832        }
833
834        let result = analyzer.analyze(&signal, sr);
835        assert!(result.is_ok());
836        let r = result.expect("should succeed");
837        // With threshold 0.0, key should be present
838        assert!(r.key.is_some());
839    }
840
841    // ── Forced stereo conversion via num_channels ──
842
843    #[test]
844    fn test_num_channels_forced_stereo() {
845        let config = MirConfig {
846            num_channels: 2,
847            enable_beat_tracking: false,
848            enable_key_detection: false,
849            enable_chord_recognition: false,
850            enable_melody_extraction: false,
851            enable_structure_analysis: false,
852            enable_genre_classification: false,
853            enable_mood_detection: false,
854            enable_spectral_features: true,
855            enable_rhythm_features: false,
856            enable_harmonic_analysis: false,
857            ..MirConfig::default()
858        };
859        let analyzer = MirAnalyzer::new(config);
860
861        // 4 samples interleaved = 2 mono samples
862        let _stereo = vec![0.5, -0.5, 0.3, -0.3];
863        // This is too short for spectral analysis but tests the conversion path
864        let sr = 44100.0;
865
866        // Create longer signal
867        let mut stereo_long = Vec::new();
868        for i in 0..44100 {
869            let t = i as f32 / sr;
870            stereo_long.push((TAU * 440.0 * t).sin());
871            stereo_long.push((TAU * 550.0 * t).sin());
872        }
873
874        let result = analyzer.analyze(&stereo_long, sr);
875        assert!(result.is_ok());
876        let r = result.expect("should succeed");
877        // Duration should be based on mono length (half the stereo = 44100 samples / 44100 Hz = 1.0s)
878        assert!((r.duration - 1.0).abs() < 0.1);
879    }
880}
oximedia_mir/lib.rs

oximedia_mir/
lib.rs