use rustfft::{Fft, FftPlanner, num_complex::Complex};
use std::sync::Arc;
const ENERGY_HISTORY_SIZE: usize = 90;
const FLUX_HISTORY_SIZE: usize = 20;
const SPECTRUM_BINS: usize = 256;
const FFT_SIZE: usize = 4096;
const ONSET_HISTORY_SIZE: usize = 512;
pub struct AudioAnalyzer {
samples: Vec<f32>,
sample_rate: u32,
fft_size: usize,
total_duration: f32,
pub sub_bass: f32,
pub bass: f32,
pub low_mids: f32,
pub mids: f32,
pub high_mids: f32,
pub highs: f32,
pub smoothed_sub_bass: f32,
pub smoothed_bass: f32,
pub smoothed_low_mids: f32,
pub smoothed_mids: f32,
pub smoothed_high_mids: f32,
pub smoothed_highs: f32,
current_spectrum: Vec<f32>,
prev_spectrum: Vec<f32>,
pub spectral_flux: f32,
spectral_flux_history: Vec<f32>,
flux_history_index: usize,
pub onset_detected: bool,
pub onset_decay: f32,
pub kick_decay: f32,
pub snare_decay: f32,
pub hat_decay: f32,
energy_history: Vec<f32>,
energy_history_index: usize,
pub average_energy: f32,
pub long_term_energy: f32,
pub intensity: f32,
fft_buffer: Vec<Complex<f32>>,
fft_plan: Option<Arc<dyn Fft<f32>>>,
last_analysis_time: f32,
pub spectral_centroid: f32,
pub spectral_flatness: f32,
pub spectral_rolloff: f32,
pub smoothed_centroid: f32,
pub smoothed_flatness: f32,
pub smoothed_rolloff: f32,
pub is_breakdown: bool,
pub breakdown_intensity: f32,
pub transient_energy: f32,
pub sustained_energy: f32,
pub transient_ratio: f32,
onset_times: Vec<f32>,
onset_times_index: usize,
pub estimated_bpm: f32,
pub beat_phase: f32,
pub time_since_last_beat: f32,
pub beat_confidence: f32,
section_energy_short: f32,
section_energy_long: f32,
pub is_building: bool,
pub is_dropping: bool,
pub drop_intensity: f32,
pub build_intensity: f32,
prev_low_energy: f32,
prev_mid_energy: f32,
prev_high_energy: f32,
pub low_transient: f32,
pub mid_transient: f32,
pub high_transient: f32,
pub harmonic_change: f32,
prev_spectral_centroid: f32,
pub brightness_delta: f32,
pub groove_sync: f32,
pub pocket_tightness: f32,
}
impl Default for AudioAnalyzer {
fn default() -> Self {
Self {
samples: Vec::new(),
sample_rate: 44100,
fft_size: FFT_SIZE,
total_duration: 0.0,
sub_bass: 0.0,
bass: 0.0,
low_mids: 0.0,
mids: 0.0,
high_mids: 0.0,
highs: 0.0,
smoothed_sub_bass: 0.0,
smoothed_bass: 0.0,
smoothed_low_mids: 0.0,
smoothed_mids: 0.0,
smoothed_high_mids: 0.0,
smoothed_highs: 0.0,
current_spectrum: vec![0.0; SPECTRUM_BINS],
prev_spectrum: vec![0.0; SPECTRUM_BINS],
spectral_flux: 0.0,
spectral_flux_history: vec![0.0; FLUX_HISTORY_SIZE],
flux_history_index: 0,
onset_detected: false,
onset_decay: 0.0,
kick_decay: 0.0,
snare_decay: 0.0,
hat_decay: 0.0,
energy_history: vec![0.0; ENERGY_HISTORY_SIZE],
energy_history_index: 0,
average_energy: 0.0,
long_term_energy: 0.0,
intensity: 0.0,
fft_buffer: vec![Complex::new(0.0, 0.0); FFT_SIZE],
fft_plan: None,
last_analysis_time: -1.0,
spectral_centroid: 0.0,
spectral_flatness: 0.0,
spectral_rolloff: 0.0,
smoothed_centroid: 0.0,
smoothed_flatness: 0.0,
smoothed_rolloff: 0.0,
is_breakdown: false,
breakdown_intensity: 0.0,
transient_energy: 0.0,
sustained_energy: 0.0,
transient_ratio: 0.0,
onset_times: vec![0.0; ONSET_HISTORY_SIZE],
onset_times_index: 0,
estimated_bpm: 120.0,
beat_phase: 0.0,
time_since_last_beat: 0.0,
beat_confidence: 0.0,
section_energy_short: 0.0,
section_energy_long: 0.0,
is_building: false,
is_dropping: false,
drop_intensity: 0.0,
build_intensity: 0.0,
prev_low_energy: 0.0,
prev_mid_energy: 0.0,
prev_high_energy: 0.0,
low_transient: 0.0,
mid_transient: 0.0,
high_transient: 0.0,
harmonic_change: 0.0,
prev_spectral_centroid: 0.0,
brightness_delta: 0.0,
groove_sync: 0.0,
pocket_tightness: 0.5,
}
}
}
impl AudioAnalyzer {
pub fn new() -> Self {
Self::default()
}
pub fn with_sample_rate(mut self, sample_rate: u32) -> Self {
self.sample_rate = sample_rate;
self
}
pub fn with_fft_size(mut self, fft_size: usize) -> Self {
self.fft_size = fft_size;
self.fft_buffer = vec![Complex::new(0.0, 0.0); fft_size];
self.fft_plan = None;
self
}
pub fn load_samples(&mut self, samples: Vec<f32>, sample_rate: u32) {
self.samples = samples;
self.sample_rate = sample_rate;
self.total_duration = self.samples.len() as f32 / self.sample_rate as f32;
self.reset();
}
pub fn total_duration(&self) -> f32 {
self.total_duration
}
pub fn sample_rate(&self) -> u32 {
self.sample_rate
}
pub fn has_samples(&self) -> bool {
!self.samples.is_empty()
}
pub fn samples(&self) -> &[f32] {
&self.samples
}
pub fn prev_spectrum(&self) -> &[f32] {
&self.prev_spectrum
}
pub fn fft_size(&self) -> usize {
self.fft_size
}
pub fn song_progress(&self, time_seconds: f32) -> f32 {
if self.total_duration > 0.0 {
(time_seconds / self.total_duration).clamp(0.0, 1.0)
} else {
0.0
}
}
pub fn reset(&mut self) {
self.sub_bass = 0.0;
self.bass = 0.0;
self.low_mids = 0.0;
self.mids = 0.0;
self.high_mids = 0.0;
self.highs = 0.0;
self.smoothed_sub_bass = 0.0;
self.smoothed_bass = 0.0;
self.smoothed_low_mids = 0.0;
self.smoothed_mids = 0.0;
self.smoothed_high_mids = 0.0;
self.smoothed_highs = 0.0;
self.current_spectrum.fill(0.0);
self.prev_spectrum.fill(0.0);
self.spectral_flux = 0.0;
self.spectral_flux_history.fill(0.0);
self.flux_history_index = 0;
self.onset_detected = false;
self.onset_decay = 0.0;
self.kick_decay = 0.0;
self.snare_decay = 0.0;
self.hat_decay = 0.0;
self.energy_history.fill(0.0);
self.energy_history_index = 0;
self.average_energy = 0.0;
self.long_term_energy = 0.0;
self.intensity = 0.0;
self.last_analysis_time = -1.0;
self.spectral_centroid = 0.0;
self.spectral_flatness = 0.0;
self.spectral_rolloff = 0.0;
self.smoothed_centroid = 0.0;
self.smoothed_flatness = 0.0;
self.smoothed_rolloff = 0.0;
self.is_breakdown = false;
self.breakdown_intensity = 0.0;
self.transient_energy = 0.0;
self.sustained_energy = 0.0;
self.transient_ratio = 0.0;
self.onset_times.fill(0.0);
self.onset_times_index = 0;
self.estimated_bpm = 120.0;
self.beat_phase = 0.0;
self.time_since_last_beat = 0.0;
self.beat_confidence = 0.0;
self.section_energy_short = 0.0;
self.section_energy_long = 0.0;
self.is_building = false;
self.is_dropping = false;
self.drop_intensity = 0.0;
self.build_intensity = 0.0;
self.prev_low_energy = 0.0;
self.prev_mid_energy = 0.0;
self.prev_high_energy = 0.0;
self.low_transient = 0.0;
self.mid_transient = 0.0;
self.high_transient = 0.0;
self.harmonic_change = 0.0;
self.prev_spectral_centroid = 0.0;
self.brightness_delta = 0.0;
self.groove_sync = 0.0;
self.pocket_tightness = 0.5;
}
pub fn analyze_at_time(&mut self, time_seconds: f32) {
if self.samples.is_empty() {
return;
}
let sample_position = (time_seconds * self.sample_rate as f32) as usize;
if sample_position + self.fft_size > self.samples.len() {
return;
}
let delta_time = time_seconds - self.last_analysis_time;
if delta_time.abs() < 0.008 {
return;
}
self.last_analysis_time = time_seconds;
let pi = std::f32::consts::PI;
for (fft_index, fft_sample) in self.fft_buffer.iter_mut().enumerate() {
let sample = self.samples[sample_position + fft_index];
let window = 0.5 - 0.5 * (2.0 * pi * fft_index as f32 / self.fft_size as f32).cos();
*fft_sample = Complex::new(sample * window, 0.0);
}
let fft = self
.fft_plan
.get_or_insert_with(|| {
let mut planner = FftPlanner::new();
planner.plan_fft_forward(self.fft_size)
})
.clone();
fft.process(&mut self.fft_buffer);
let freq_resolution = self.sample_rate as f32 / self.fft_size as f32;
let half_fft = self.fft_size / 2;
let sub_bass_start = (20.0 / freq_resolution) as usize;
let sub_bass_end = (60.0 / freq_resolution) as usize;
let bass_end = (250.0 / freq_resolution) as usize;
let low_mids_end = (500.0 / freq_resolution) as usize;
let mids_end = (2000.0 / freq_resolution) as usize;
let high_mids_end = (4000.0 / freq_resolution) as usize;
let highs_end = (12000.0 / freq_resolution) as usize;
let band_rms = |buffer: &[Complex<f32>], start: usize, end: usize| -> f32 {
let start = start.max(1).min(half_fft);
let end = end.min(half_fft);
if start >= end {
return 0.0;
}
let sum: f32 = buffer[start..end].iter().map(|c| c.norm_sqr()).sum();
(sum / (end - start) as f32).sqrt() / self.fft_size as f32
};
let raw_sub_bass = band_rms(&self.fft_buffer, sub_bass_start, sub_bass_end);
let raw_bass = band_rms(&self.fft_buffer, sub_bass_end, bass_end);
let raw_low_mids = band_rms(&self.fft_buffer, bass_end, low_mids_end);
let raw_mids = band_rms(&self.fft_buffer, low_mids_end, mids_end);
let raw_high_mids = band_rms(&self.fft_buffer, mids_end, high_mids_end);
let raw_highs = band_rms(&self.fft_buffer, high_mids_end, highs_end);
let to_normalized = |amplitude: f32, floor: f32, ceiling: f32| -> f32 {
let db = 20.0 * (amplitude + 1e-10).log10();
((db - floor) / (ceiling - floor)).clamp(0.0, 1.0)
};
self.sub_bass = to_normalized(raw_sub_bass, -75.0, -25.0);
self.bass = to_normalized(raw_bass, -70.0, -25.0);
self.low_mids = to_normalized(raw_low_mids, -65.0, -25.0);
self.mids = to_normalized(raw_mids, -60.0, -20.0);
self.high_mids = to_normalized(raw_high_mids, -60.0, -20.0);
self.highs = to_normalized(raw_highs, -65.0, -25.0);
let attack = 0.4;
let release = 0.08;
let smooth = |current: f32, target: f32| -> f32 {
let factor = if target > current { attack } else { release };
current + (target - current) * factor
};
self.smoothed_sub_bass = smooth(self.smoothed_sub_bass, self.sub_bass);
self.smoothed_bass = smooth(self.smoothed_bass, self.bass);
self.smoothed_low_mids = smooth(self.smoothed_low_mids, self.low_mids);
self.smoothed_mids = smooth(self.smoothed_mids, self.mids);
self.smoothed_high_mids = smooth(self.smoothed_high_mids, self.high_mids);
self.smoothed_highs = smooth(self.smoothed_highs, self.highs);
let mut weighted_freq_sum = 0.0_f32;
let mut magnitude_sum = 0.0_f32;
let mut geometric_sum = 0.0_f32;
let mut non_zero_bin_count = 0_usize;
let mut arithmetic_sum = 0.0_f32;
let mut cumulative_energy = 0.0_f32;
let total_energy_target = {
let mut total = 0.0_f32;
for bin_index in 1..half_fft {
total += self.fft_buffer[bin_index].norm_sqr();
}
total * 0.85
};
let mut rolloff_bin = half_fft;
for bin_index in 1..half_fft {
let magnitude = self.fft_buffer[bin_index].norm();
let frequency = bin_index as f32 * freq_resolution;
weighted_freq_sum += frequency * magnitude;
magnitude_sum += magnitude;
if magnitude > 1e-10 {
geometric_sum += magnitude.ln();
non_zero_bin_count += 1;
}
arithmetic_sum += magnitude;
cumulative_energy += self.fft_buffer[bin_index].norm_sqr();
if cumulative_energy < total_energy_target {
rolloff_bin = bin_index;
}
}
let bin_count = (half_fft - 1) as f32;
self.spectral_centroid = if magnitude_sum > 1e-10 {
(weighted_freq_sum / magnitude_sum) / (self.sample_rate as f32 / 2.0)
} else {
0.0
};
let geometric_mean = if non_zero_bin_count > 0 {
(geometric_sum / non_zero_bin_count as f32).exp()
} else {
0.0
};
let arithmetic_mean = arithmetic_sum / bin_count;
self.spectral_flatness = if arithmetic_mean > 1e-10 {
(geometric_mean / arithmetic_mean).clamp(0.0, 1.0)
} else {
0.0
};
self.spectral_rolloff = rolloff_bin as f32 / half_fft as f32;
self.smoothed_centroid = self.smoothed_centroid * 0.85 + self.spectral_centroid * 0.15;
self.smoothed_flatness = self.smoothed_flatness * 0.9 + self.spectral_flatness * 0.1;
self.brightness_delta = self.spectral_centroid - self.prev_spectral_centroid;
self.prev_spectral_centroid = self.spectral_centroid;
let num_bins = SPECTRUM_BINS.min(half_fft);
let bins_per_band = (half_fft / num_bins).max(1);
for spectrum_index in 0..num_bins {
let start = spectrum_index * bins_per_band + 1;
let end = (start + bins_per_band).min(half_fft);
let sum: f32 = self.fft_buffer[start..end].iter().map(|c| c.norm()).sum();
self.current_spectrum[spectrum_index] = sum / bins_per_band as f32;
}
let kick_end = num_bins / 8;
let mut kick_flux = 0.0_f32;
for spectrum_index in 0..kick_end {
let diff = self.current_spectrum[spectrum_index] - self.prev_spectrum[spectrum_index];
if diff > 0.0 {
kick_flux += diff;
}
}
let snare_start = num_bins / 6;
let snare_end = num_bins / 2;
let mut snare_flux = 0.0_f32;
for spectrum_index in snare_start..snare_end {
let diff = self.current_spectrum[spectrum_index] - self.prev_spectrum[spectrum_index];
if diff > 0.0 {
snare_flux += diff;
}
}
let hat_flux_start = (num_bins as f32 * 0.6) as usize;
let hat_flux_end = num_bins;
let mut hat_flux = 0.0_f32;
for spectrum_index in hat_flux_start..hat_flux_end {
let diff = self.current_spectrum[spectrum_index] - self.prev_spectrum[spectrum_index];
if diff > 0.0 {
hat_flux += diff;
}
}
let mut total_flux = 0.0_f32;
for spectrum_index in 0..num_bins {
let diff = self.current_spectrum[spectrum_index] - self.prev_spectrum[spectrum_index];
if diff > 0.0 {
total_flux += diff;
}
}
self.spectral_flux = total_flux / num_bins as f32;
std::mem::swap(&mut self.current_spectrum, &mut self.prev_spectrum);
let low_energy = self.smoothed_sub_bass + self.smoothed_bass;
let mid_energy = self.smoothed_low_mids + self.smoothed_mids;
let high_energy = self.smoothed_high_mids + self.smoothed_highs;
self.low_transient = ((low_energy - self.prev_low_energy).max(0.0) * 2.5).min(1.0);
self.mid_transient = ((mid_energy - self.prev_mid_energy).max(0.0) * 2.5).min(1.0);
self.high_transient = ((high_energy - self.prev_high_energy).max(0.0) * 2.5).min(1.0);
self.prev_low_energy = low_energy;
self.prev_mid_energy = mid_energy;
self.prev_high_energy = high_energy;
let instant_transient =
(self.low_transient + self.mid_transient + self.high_transient) / 3.0;
self.transient_energy = self.transient_energy * 0.75 + instant_transient * 0.25;
let instant_sustained = (low_energy + mid_energy + high_energy) / 6.0;
self.sustained_energy = self.sustained_energy * 0.97 + instant_sustained * 0.03;
self.transient_ratio = if self.sustained_energy > 0.02 {
(self.transient_energy / self.sustained_energy).clamp(0.0, 2.0)
} else {
0.0
};
self.spectral_flux_history[self.flux_history_index] = self.spectral_flux;
self.flux_history_index = (self.flux_history_index + 1) % FLUX_HISTORY_SIZE;
let flux_mean: f32 =
self.spectral_flux_history.iter().sum::<f32>() / FLUX_HISTORY_SIZE as f32;
let flux_variance: f32 = self
.spectral_flux_history
.iter()
.map(|f| (f - flux_mean).powi(2))
.sum::<f32>()
/ FLUX_HISTORY_SIZE as f32;
let flux_std = flux_variance.sqrt();
let flux_threshold = flux_mean + flux_std * 1.5;
let onset_triggered = self.spectral_flux > flux_threshold && self.spectral_flux > 0.004;
if onset_triggered && self.onset_decay < 0.3 {
self.onset_detected = true;
self.onset_decay = 1.0;
self.onset_times[self.onset_times_index] = time_seconds;
self.onset_times_index = (self.onset_times_index + 1) % ONSET_HISTORY_SIZE;
self.update_tempo_estimation();
} else {
self.onset_detected = false;
self.onset_decay *= 0.88;
}
let kick_threshold = 0.02 + self.long_term_energy * 0.025;
let kick_triggered =
kick_flux > kick_threshold && self.smoothed_sub_bass > 0.3 && self.low_transient > 0.2;
if kick_triggered && self.kick_decay < 0.2 {
self.kick_decay = 1.0;
self.time_since_last_beat = 0.0;
} else {
self.kick_decay *= 0.88;
}
let snare_threshold = 0.015 + self.long_term_energy * 0.02;
let snare_triggered =
snare_flux > snare_threshold && self.smoothed_mids > 0.25 && self.mid_transient > 0.15;
if snare_triggered && self.snare_decay < 0.2 {
self.snare_decay = 1.0;
} else {
self.snare_decay *= 0.84;
}
let hat_threshold = 0.012 + self.long_term_energy * 0.015;
let hat_triggered =
hat_flux > hat_threshold && self.smoothed_highs > 0.2 && self.high_transient > 0.12;
if hat_triggered && self.hat_decay < 0.15 {
self.hat_decay = 1.0;
} else {
self.hat_decay *= 0.8;
}
self.time_since_last_beat += delta_time.max(0.0);
if self.estimated_bpm > 0.0 {
let beat_period = 60.0 / self.estimated_bpm;
self.beat_phase = (self.time_since_last_beat % beat_period) / beat_period;
self.groove_sync = 1.0 - (self.beat_phase * 2.0 - 1.0).abs();
}
let current_energy = self.smoothed_sub_bass * 0.15
+ self.smoothed_bass * 0.25
+ self.smoothed_low_mids * 0.2
+ self.smoothed_mids * 0.2
+ self.smoothed_high_mids * 0.12
+ self.smoothed_highs * 0.08;
self.energy_history[self.energy_history_index] = current_energy;
self.energy_history_index = (self.energy_history_index + 1) % ENERGY_HISTORY_SIZE;
self.average_energy = self.energy_history.iter().sum::<f32>() / ENERGY_HISTORY_SIZE as f32;
self.long_term_energy = self.long_term_energy * 0.995 + current_energy * 0.005;
self.intensity = if self.long_term_energy > 0.02 {
(current_energy / (self.long_term_energy * 1.8)).clamp(0.0, 2.0)
} else {
current_energy * 0.5
};
self.section_energy_short = self.section_energy_short * 0.94 + current_energy * 0.06;
self.section_energy_long = self.section_energy_long * 0.997 + current_energy * 0.003;
let energy_ratio = if self.section_energy_long > 0.02 {
self.section_energy_short / self.section_energy_long
} else {
1.0
};
let prev_building = self.is_building;
let prev_dropping = self.is_dropping;
let prev_breakdown = self.is_breakdown;
self.is_building = energy_ratio > 1.2 && current_energy > self.average_energy * 0.8;
let drop_kick_recent = self.kick_decay > 0.5;
self.is_dropping = energy_ratio > 1.5 && drop_kick_recent && self.smoothed_bass > 0.4;
self.is_breakdown = energy_ratio < 0.6 && current_energy < self.long_term_energy * 0.5;
if self.is_building && !prev_building {
self.build_intensity = 0.0;
}
if self.is_building {
self.build_intensity = (self.build_intensity + 0.015).min(1.0);
} else {
self.build_intensity *= 0.96;
}
if self.is_dropping && !prev_dropping {
self.drop_intensity = 1.0;
} else {
self.drop_intensity *= 0.98;
}
if self.is_breakdown && !prev_breakdown {
self.breakdown_intensity = 1.0;
} else if self.is_breakdown {
self.breakdown_intensity = (self.breakdown_intensity * 0.99).max(0.3);
} else {
self.breakdown_intensity *= 0.92;
}
self.smoothed_rolloff = self.smoothed_rolloff * 0.9 + self.spectral_rolloff * 0.1;
self.harmonic_change = (self.brightness_delta.abs() * 3.0
+ (self.spectral_flatness - self.smoothed_flatness).abs() * 2.0)
.clamp(0.0, 1.0);
let expected_beat_variance = if self.estimated_bpm > 0.0 {
let beat_period = 60.0 / self.estimated_bpm;
let normalized_time = self.time_since_last_beat / beat_period;
let phase_error = (normalized_time.fract() - 0.5).abs();
1.0 - phase_error * 2.0
} else {
0.5
};
self.pocket_tightness = self.pocket_tightness * 0.95 + expected_beat_variance * 0.05;
}
fn update_tempo_estimation(&mut self) {
let mut valid_intervals = Vec::new();
let min_interval = 60.0 / 200.0;
let max_interval = 1.0;
for index in 0..ONSET_HISTORY_SIZE {
let current_time = self.onset_times[index];
if current_time <= 0.0 {
continue;
}
for other_index in (index + 1)..ONSET_HISTORY_SIZE {
let other_time = self.onset_times[other_index];
if other_time <= 0.0 {
continue;
}
let interval = (other_time - current_time).abs();
if interval >= min_interval && interval <= max_interval {
valid_intervals.push(interval);
}
let half_interval = interval / 2.0;
if half_interval >= min_interval && half_interval <= max_interval {
valid_intervals.push(half_interval);
}
let double_interval = interval * 2.0;
if double_interval >= min_interval && double_interval <= max_interval {
valid_intervals.push(double_interval);
}
}
}
if valid_intervals.len() < 4 {
return;
}
valid_intervals.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let mut best_interval = 0.0_f32;
let mut best_count = 0_usize;
let tolerance = 0.025;
for &interval in &valid_intervals {
let count = valid_intervals
.iter()
.filter(|&&other| (other - interval).abs() < tolerance)
.count();
if count > best_count {
best_count = count;
best_interval = interval;
}
}
if best_count >= 3 && best_interval > 0.0 {
let new_bpm = 60.0 / best_interval;
let clamped_bpm = new_bpm.clamp(60.0, 200.0);
self.beat_confidence = (best_count as f32 / valid_intervals.len() as f32).min(1.0);
let blend = 0.15 * self.beat_confidence;
self.estimated_bpm = self.estimated_bpm * (1.0 - blend) + clamped_bpm * blend;
}
}
}