use rustfft::num_complex::Complex;
use audio_processor_traits::simple_processor::MonoAudioProcessor;
use audio_processor_traits::{AudioBuffer, AudioContext};
use dynamic_thresholds::{DynamicThresholds, DynamicThresholdsParams};
use power_change::{PowerOfChangeFrames, PowerOfChangeParams};
use crate::fft_processor::{FftDirection, FftProcessor, FftProcessorOptions};
use crate::window_functions::WindowFunctionType;
mod dynamic_thresholds;
mod frame_deltas;
mod power_change;
pub mod markers;
#[cfg(any(test, feature = "visualization"))]
pub mod visualization;
#[derive(Debug, Clone)]
pub struct IterativeTransientDetectionParams {
pub fft_size: usize,
pub fft_overlap_ratio: f32,
pub power_of_change_spectral_spread: usize,
pub threshold_time_spread: usize,
pub threshold_time_spread_factor: f32,
pub frequency_bin_change_threshold: usize,
pub iteration_magnitude_factor: f32,
pub iteration_count: usize,
}
impl Default for IterativeTransientDetectionParams {
fn default() -> Self {
let fft_size = 2048;
let frequency_bin_change_threshold = 2048 / 4;
Self {
fft_size,
fft_overlap_ratio: 0.75,
power_of_change_spectral_spread: 3,
threshold_time_spread: 2,
threshold_time_spread_factor: 2.0,
iteration_magnitude_factor: 0.05,
iteration_count: 20,
frequency_bin_change_threshold,
}
}
}
pub fn find_transients(
params: IterativeTransientDetectionParams,
data: &mut AudioBuffer<f32>,
) -> Vec<f32> {
let IterativeTransientDetectionParams {
fft_size,
fft_overlap_ratio,
power_of_change_spectral_spread,
threshold_time_spread,
threshold_time_spread_factor,
frequency_bin_change_threshold,
iteration_magnitude_factor,
iteration_count,
} = params;
log::info!("Performing FFT...");
let fft_frames = get_fft_frames(fft_size, fft_overlap_ratio, data);
log::info!("Finding base function values");
let mut magnitude_frames: Vec<Vec<f32>> = get_magnitudes(&fft_frames);
let mut transient_magnitude_frames: Vec<Vec<f32>> =
initialize_result_transient_magnitude_frames(&mut magnitude_frames);
for _iteration in 0..iteration_count {
let t_results = frame_deltas::calculate_deltas(&magnitude_frames);
let f_frames = power_change::calculate_power_of_change(
PowerOfChangeParams {
spectral_spread_bins: power_of_change_spectral_spread,
},
&t_results,
);
let threshold_frames = dynamic_thresholds::calculate_dynamic_thresholds(
DynamicThresholdsParams {
threshold_time_spread,
threshold_time_spread_factor,
},
&f_frames,
);
let num_changed_bins_frames: Vec<usize> =
count_changed_bins_per_frame(f_frames, threshold_frames);
update_output_and_magnitudes(
iteration_magnitude_factor,
frequency_bin_change_threshold,
num_changed_bins_frames,
&mut magnitude_frames,
&mut transient_magnitude_frames,
);
}
generate_output_frames(
fft_size,
fft_overlap_ratio,
data,
&fft_frames,
&mut transient_magnitude_frames,
)
}
fn update_output_and_magnitudes(
iteration_magnitude_factor: f32,
frequency_bin_change_threshold: usize,
num_changed_bins_frames: Vec<usize>,
magnitude_frames: &mut [Vec<f32>],
transient_magnitude_frames: &mut [Vec<f32>],
) {
for i in 0..transient_magnitude_frames.len() {
for j in 0..transient_magnitude_frames[i].len() {
if num_changed_bins_frames[i] >= frequency_bin_change_threshold {
transient_magnitude_frames[i][j] +=
iteration_magnitude_factor * magnitude_frames[i][j];
magnitude_frames[i][j] -=
(1.0 - iteration_magnitude_factor) * magnitude_frames[i][j];
}
}
}
}
fn count_changed_bins_per_frame(
f_frames: PowerOfChangeFrames,
threshold_frames: DynamicThresholds,
) -> Vec<usize> {
threshold_frames
.buffer
.iter()
.zip(f_frames.buffer)
.map(|(threshold_frame, f_frame)| {
threshold_frame
.iter()
.zip(f_frame)
.map(|(threshold, f)| usize::from(f > *threshold))
.sum()
})
.collect()
}
fn generate_output_frames(
fft_size: usize,
fft_overlap_ratio: f32,
data: &mut AudioBuffer<f32>,
fft_frames: &[Vec<Complex<f32>>],
transient_magnitude_frames: &mut [Vec<f32>],
) -> Vec<f32> {
let mut planner = rustfft::FftPlanner::new();
let fft = planner.plan_fft(fft_size, FftDirection::Inverse);
let scratch_size = fft.get_inplace_scratch_len();
let mut scratch = Vec::with_capacity(scratch_size);
scratch.resize(scratch_size, 0.0.into());
let mut output = vec![];
output.resize(data.num_samples(), 0.0);
let mut cursor = 0;
for i in 0..fft_frames.len() {
let frame = &fft_frames[i];
let mut buffer: Vec<Complex<f32>> = frame
.iter()
.zip(&transient_magnitude_frames[i])
.map(|(input_signal_complex, transient_magnitude)| {
Complex::from_polar(*transient_magnitude, input_signal_complex.arg())
})
.collect();
fft.process_with_scratch(&mut buffer, &mut scratch);
for j in 0..buffer.len() {
if cursor + j < output.len() {
output[cursor + j] += buffer[j].re;
}
}
cursor += (frame.len() as f32 * (1.0 - fft_overlap_ratio)) as usize;
}
let maximum_output = output
.iter()
.map(|f| f.abs())
.max_by(|f1, f2| f1.partial_cmp(f2).unwrap_or(std::cmp::Ordering::Equal))
.unwrap_or(0.0);
for sample in &mut output {
if sample.abs() > maximum_output * 0.05 {
*sample /= maximum_output;
} else {
*sample = 0.0;
}
}
output.iter().skip(fft_size).cloned().collect()
}
fn initialize_result_transient_magnitude_frames(magnitudes: &mut [Vec<f32>]) -> Vec<Vec<f32>> {
magnitudes
.iter()
.map(|frame| frame.iter().map(|_| 0.0).collect())
.collect()
}
fn get_magnitudes(fft_frames: &[Vec<Complex<f32>>]) -> Vec<Vec<f32>> {
fft_frames
.iter()
.map(|frame| {
frame
.iter()
.map(|frequency_bin| frequency_bin.norm())
.collect()
})
.collect()
}
fn get_fft_frames(
fft_size: usize,
fft_overlap_ratio: f32,
data: &mut AudioBuffer<f32>,
) -> Vec<Vec<Complex<f32>>> {
let mut fft = FftProcessor::new(FftProcessorOptions {
size: fft_size,
direction: FftDirection::Forward,
overlap_ratio: fft_overlap_ratio,
window_function: WindowFunctionType::Hann,
});
let mut fft_frames = vec![];
let mut context = AudioContext::default();
for sample_num in 0..data.num_samples() {
let mut input_sample = 0.0;
for channel in 0..data.num_channels() {
input_sample += data.get(channel, sample_num);
}
let output_sample = fft.m_process(&mut context, input_sample);
for channel in 0..data.num_channels() {
data.set(channel, sample_num, output_sample);
}
if fft.has_changed() {
fft_frames.push(fft.buffer().clone());
}
}
fft_frames
}
#[cfg(test)]
mod test {
use audio_processor_testing_helpers::relative_path;
use audio_processor_file::{AudioFileProcessor, OutputAudioFileProcessor};
use audio_processor_traits::{AudioProcessor, AudioProcessorSettings};
use super::*;
fn read_input_file(input_file_path: &str) -> AudioBuffer<f32> {
log::info!("Reading input file input_file={}", input_file_path);
let settings = AudioProcessorSettings::default();
let mut input = AudioFileProcessor::from_path(
audio_garbage_collector::handle(),
settings,
input_file_path,
)
.unwrap();
let mut context = AudioContext::from(settings);
input.prepare(&mut context);
let input_buffer = input.buffer();
let mut buffer = AudioBuffer::empty();
let max_len = (settings.sample_rate() * 10.0) as usize;
buffer.resize(1, input_buffer[0].len().min(max_len));
for channel in input_buffer.iter() {
for (sample_index, sample) in channel.iter().enumerate().take(max_len) {
buffer.set(0, sample_index, *sample + buffer.get(0, sample_index));
}
}
buffer
}
#[test]
fn test_transient_detector() {
use visualization::draw;
wisual_logger::init_from_env();
let output_path = relative_path!("./src/transient_detection/stft.png");
let input_path = relative_path!("./hiphop-drum-loop.mp3");
let transients_file_path = format!("{}.transients.wav", input_path);
let mut input = read_input_file(&input_path);
let frames: Vec<f32> = input.channel(0).iter().cloned().collect();
let max_input = frames
.iter()
.map(|f| f.abs())
.max_by(|f1, f2| f1.partial_cmp(f2).unwrap_or(std::cmp::Ordering::Equal))
.unwrap();
let transients = find_transients(
IterativeTransientDetectionParams {
iteration_count: 2,
..IterativeTransientDetectionParams::default()
},
&mut input,
);
assert_eq!(
frames.len() - IterativeTransientDetectionParams::default().fft_size,
transients.len()
);
draw(&output_path, &frames, &transients);
let settings = AudioProcessorSettings {
input_channels: 1,
output_channels: 1,
..AudioProcessorSettings::default()
};
let mut output_processor =
OutputAudioFileProcessor::from_path(settings, &transients_file_path);
output_processor.prepare(settings);
let transients: Vec<f32> = transients.iter().map(|f| f * max_input).collect();
let mut buffer = AudioBuffer::from_interleaved(1, &transients);
output_processor
.process(&mut buffer)
.expect("Failed to write transients to file");
}
}