use crate::pitch::{PitchResult, PitchTracker, VibratoResult};
use crate::{AnalysisConfig, AnalysisError, Result};
#[derive(Debug, Clone)]
pub struct SingingDetectionResult {
pub is_singing: bool,
pub confidence: f32,
pub pitch_stability: f32,
pub voicing_continuity: f32,
pub has_vibrato: bool,
pub vibrato: VibratoResult,
pub mean_f0: f32,
}
#[derive(Debug, Clone)]
pub struct SingingQuality {
pub overall: f32,
pub intonation: f32,
pub vibrato_regularity: f32,
pub pitch_stability: f32,
pub tonal_clarity: f32,
pub dynamic_consistency: f32,
}
pub struct SingingAnalyzer {
config: AnalysisConfig,
pitch_tracker: PitchTracker,
}
impl SingingAnalyzer {
#[must_use]
pub fn new(config: AnalysisConfig) -> Self {
let pitch_tracker = PitchTracker::new(config.clone());
Self { config, pitch_tracker }
}
pub fn detect(&self, samples: &[f32], sample_rate: f32) -> Result<SingingDetectionResult> {
if samples.len() < self.config.fft_size {
return Err(AnalysisError::InsufficientSamples {
needed: self.config.fft_size,
got: samples.len(),
});
}
let pitch_result = self.pitch_tracker.track(samples, sample_rate)?;
let vibrato = crate::pitch::detect_vibrato(&pitch_result, self.config.hop_size, sample_rate);
let pitch_stability = compute_pitch_stability(&pitch_result);
let voicing_continuity = pitch_result.voicing_rate;
let is_singing = voicing_continuity > 0.55 && pitch_stability > 0.45;
let confidence = {
let vc_score = (voicing_continuity - 0.5).max(0.0) / 0.5;
let ps_score = (pitch_stability - 0.3).max(0.0) / 0.7;
let vib_bonus = if vibrato.present { 0.1_f32 } else { 0.0 };
((vc_score * 0.5 + ps_score * 0.4 + vib_bonus).min(1.0)).max(0.0)
};
Ok(SingingDetectionResult {
is_singing,
confidence,
pitch_stability,
voicing_continuity,
has_vibrato: vibrato.present,
vibrato,
mean_f0: pitch_result.mean_f0,
})
}
pub fn assess_quality(&self, samples: &[f32], sample_rate: f32) -> Result<SingingQuality> {
if samples.len() < self.config.fft_size {
return Err(AnalysisError::InsufficientSamples {
needed: self.config.fft_size,
got: samples.len(),
});
}
let pitch_result = self.pitch_tracker.track(samples, sample_rate)?;
let vibrato = crate::pitch::detect_vibrato(&pitch_result, self.config.hop_size, sample_rate);
let pitch_stability = compute_pitch_stability(&pitch_result);
let intonation = compute_intonation_score(&pitch_result);
let vibrato_regularity = compute_vibrato_regularity(&vibrato, &pitch_result);
let tonal_clarity = compute_tonal_clarity(samples, sample_rate, pitch_result.mean_f0);
let dynamic_consistency = compute_dynamic_consistency(samples, self.config.hop_size);
let overall = (intonation * 0.3
+ vibrato_regularity * 0.2
+ pitch_stability * 0.25
+ tonal_clarity * 0.15
+ dynamic_consistency * 0.1)
.min(1.0)
.max(0.0);
Ok(SingingQuality {
overall,
intonation,
vibrato_regularity,
pitch_stability,
tonal_clarity,
dynamic_consistency,
})
}
}
fn compute_pitch_stability(pitch_result: &PitchResult) -> f32 {
let voiced: Vec<f32> = pitch_result
.estimates
.iter()
.zip(&pitch_result.confidences)
.filter(|(_, &c)| c > 0.5)
.map(|(&f, _)| f)
.collect();
if voiced.len() < 2 {
return 0.0;
}
let mean = voiced.iter().sum::<f32>() / voiced.len() as f32;
if mean <= 0.0 {
return 0.0;
}
let variance = voiced.iter().map(|&f| (f - mean).powi(2)).sum::<f32>() / voiced.len() as f32;
let std_dev = variance.sqrt();
let cv = std_dev / mean;
(1.0 - (cv * 5.0).min(1.0)).max(0.0)
}
fn compute_intonation_score(pitch_result: &PitchResult) -> f32 {
let voiced: Vec<f32> = pitch_result
.estimates
.iter()
.zip(&pitch_result.confidences)
.filter(|(_, &c)| c > 0.5)
.map(|(&f, _)| f)
.collect();
if voiced.is_empty() {
return 0.0;
}
let in_tune: usize = voiced
.iter()
.filter(|&&f| {
if f <= 0.0 {
return false;
}
let midi_float = 69.0 + 12.0 * (f / 440.0_f32).log2();
let nearest = midi_float.round();
let cents = (midi_float - nearest).abs() * 100.0;
cents <= 50.0
})
.count();
in_tune as f32 / voiced.len() as f32
}
fn compute_vibrato_regularity(vibrato: &VibratoResult, pitch_result: &PitchResult) -> f32 {
if !vibrato.present {
return 0.5;
}
let rate_score = if (5.0..=7.5).contains(&vibrato.rate) {
1.0_f32
} else if (4.0..=9.0).contains(&vibrato.rate) {
0.7
} else {
0.3
};
let extent_score = if (50.0..=150.0).contains(&vibrato.extent) {
1.0_f32
} else if (30.0..=200.0).contains(&vibrato.extent) {
0.7
} else {
0.3
};
let consistency = compute_pitch_stability(pitch_result);
(rate_score * 0.4 + extent_score * 0.4 + consistency * 0.2).min(1.0)
}
fn compute_tonal_clarity(samples: &[f32], sample_rate: f32, f0: f32) -> f32 {
if f0 <= 0.0 || samples.is_empty() {
return 0.0;
}
let period = (sample_rate / f0) as usize;
if period == 0 || samples.len() < period * 2 {
return 0.0;
}
let mut sum = 0.0_f32;
let mut norm = 0.0_f32;
for i in 0..(samples.len() - period) {
sum += samples[i] * samples[i + period];
norm += samples[i] * samples[i];
}
if norm <= 0.0 {
return 0.0;
}
let r = (sum / norm).clamp(0.0, 0.9999);
let hnr_db = 10.0 * (r / (1.0 - r)).log10();
(hnr_db / 30.0).clamp(0.0, 1.0)
}
fn compute_dynamic_consistency(samples: &[f32], hop_size: usize) -> f32 {
if samples.len() < hop_size * 2 {
return 0.0;
}
let hop = hop_size.max(1);
let rms_values: Vec<f32> = samples
.chunks(hop)
.map(|chunk| {
let sq: f32 = chunk.iter().map(|&x| x * x).sum();
(sq / chunk.len() as f32).sqrt()
})
.collect();
if rms_values.len() < 2 {
return 0.0;
}
let mean = rms_values.iter().sum::<f32>() / rms_values.len() as f32;
if mean <= 0.0 {
return 0.0;
}
let variance = rms_values.iter().map(|&r| (r - mean).powi(2)).sum::<f32>()
/ rms_values.len() as f32;
let cv = variance.sqrt() / mean;
(1.0 - (cv * 3.0).min(1.0)).max(0.0)
}
#[cfg(test)]
mod tests {
use super::*;
use std::f32::consts::PI;
fn make_sustained_sine(freq: f32, duration_s: f32, sr: f32) -> Vec<f32> {
(0..(sr * duration_s) as usize)
.map(|i| (2.0 * PI * freq * i as f32 / sr).sin() * 0.8)
.collect()
}
#[test]
fn test_singing_detector_on_sine_wave() {
let config = AnalysisConfig::default();
let analyzer = SingingAnalyzer::new(config);
let samples = make_sustained_sine(220.0, 2.0, 44100.0);
let result = analyzer.detect(&samples, 44100.0);
assert!(result.is_ok());
let r = result.expect("should succeed");
assert!(r.confidence >= 0.0 && r.confidence <= 1.0);
assert!(r.pitch_stability >= 0.0 && r.pitch_stability <= 1.0);
}
#[test]
fn test_quality_on_pure_sine() {
let config = AnalysisConfig::default();
let analyzer = SingingAnalyzer::new(config);
let samples = make_sustained_sine(440.0, 2.0, 44100.0);
let quality = analyzer.assess_quality(&samples, 44100.0);
assert!(quality.is_ok());
let q = quality.expect("should succeed");
assert!(q.overall >= 0.0 && q.overall <= 1.0);
assert!(q.intonation >= 0.0 && q.intonation <= 1.0);
assert!(q.dynamic_consistency >= 0.0 && q.dynamic_consistency <= 1.0);
}
#[test]
fn test_singing_detector_insufficient_samples() {
let config = AnalysisConfig::default();
let analyzer = SingingAnalyzer::new(config.clone());
let result = analyzer.detect(&[0.0; 10], 44100.0);
assert!(result.is_err());
}
#[test]
fn test_pitch_stability_constant_pitch() {
let estimates = vec![440.0_f32; 50];
let confidences = vec![0.9_f32; 50];
let pr = crate::pitch::PitchResult {
estimates,
confidences,
mean_f0: 440.0,
voicing_rate: 1.0,
};
let stab = compute_pitch_stability(&pr);
assert!(stab > 0.9, "Constant pitch should have very high stability: {stab}");
}
#[test]
fn test_intonation_a440() {
let estimates = vec![440.0_f32; 30];
let confidences = vec![0.9_f32; 30];
let pr = crate::pitch::PitchResult {
estimates,
confidences,
mean_f0: 440.0,
voicing_rate: 1.0,
};
let score = compute_intonation_score(&pr);
assert!(score > 0.9, "440 Hz should have near-perfect intonation: {score}");
}
#[test]
fn test_dynamic_consistency_constant_amplitude() {
let samples: Vec<f32> = vec![0.5; 4096];
let score = compute_dynamic_consistency(&samples, 512);
assert!(score > 0.8, "Constant amplitude should have high dynamic consistency: {score}");
}
}