use scirs2_core::ndarray::Array2;
use scirs2_core::Complex64;
use scirs2_fft::fft::{fft, fft2};
use scirs2_fft::memory_efficient::{fft2_efficient, fft_inplace, FftMode};
use scirs2_fft::PlanCache;
use std::f64::consts::PI;
use std::time::{Duration, Instant};
#[allow(dead_code)]
fn main() {
println!("Memory Usage Benchmarking for FFT Operations");
println!("=============================================\n");
let sizes_1d = [1024, 4096, 16384, 65536];
let sizes_2d = [(32, 32), (64, 64), (128, 128), (256, 256)];
let iterations = 10;
benchmark_1d_ffts(&sizes_1d, iterations);
benchmark_2d_ffts(&sizes_2d, iterations);
}
#[allow(dead_code)]
fn benchmark_1d_ffts(sizes: &[usize], iterations: usize) {
println!("\n1D FFT Memory Usage Benchmarking");
println!("--------------------------------");
println!(
"{:>10} | {:>15} | {:>15} | {:>15} | {:>15}",
"Size", "Standard (ms)", "Efficient (ms)", "Planned (ms)", "Best Speedup"
);
println!("{:-<75}", "");
let mut _plan_cache = PlanCache::new();
for &size in sizes {
let signal = create_test_signal(size);
let complex_signal: Vec<Complex64> =
signal.iter().map(|&x| Complex64::new(x, 0.0)).collect();
let _ = fft(&complex_signal, None).expect("Operation failed");
let _ = optimized_fft(&complex_signal).expect("Operation failed");
let _ = fft(&complex_signal, None).expect("Operation failed");
let mut total_standard = Duration::from_nanos(0);
for _ in 0..iterations {
let start = Instant::now();
let _ = fft(&complex_signal, None).expect("Operation failed");
total_standard += start.elapsed();
}
let mut total_efficient = Duration::from_nanos(0);
for _ in 0..iterations {
let start = Instant::now();
let _ = optimized_fft(&complex_signal).expect("Operation failed");
total_efficient += start.elapsed();
}
_plan_cache = PlanCache::new();
let start_first = Instant::now();
let _ = fft(&complex_signal, None).expect("Operation failed");
let first_plan_time = start_first.elapsed();
let mut total_planned = Duration::from_nanos(0);
for _ in 0..iterations {
let start = Instant::now();
let _ = fft(&complex_signal, None).expect("Operation failed");
total_planned += start.elapsed();
}
let avg_standard = total_standard.as_secs_f64() * 1000.0 / iterations as f64;
let avg_efficient = total_efficient.as_secs_f64() * 1000.0 / iterations as f64;
let avg_planned = total_planned.as_secs_f64() * 1000.0 / iterations as f64;
let min_time = avg_standard.min(avg_efficient).min(avg_planned);
let best_speedup = if min_time > 0.0 {
avg_standard / min_time
} else {
0.0
};
println!(
"{:>10} | {:>15.2} | {:>15.2} | {:>15.2} | {:>15.2}x",
size, avg_standard, avg_efficient, avg_planned, best_speedup
);
println!(
" Plan creation: {:.2} ms, Cached: {:.2} ms, Cache speedup: {:.2}x",
first_plan_time.as_secs_f64() * 1000.0,
avg_planned,
if avg_planned > 0.0 {
first_plan_time.as_secs_f64() * 1000.0 / avg_planned
} else {
0.0
}
);
}
println!("\nPlan cache used internal caching mechanism");
}
#[allow(dead_code)]
fn benchmark_2d_ffts(sizes: &[(usize, usize)], iterations: usize) {
println!("\n2D FFT Memory Usage Benchmarking");
println!("--------------------------------");
println!(
"{:>15} | {:>15} | {:>15} | {:>15} | {:>15}",
"Size", "Standard (ms)", "Efficient (ms)", "Planned (ms)", "Best Speedup"
);
println!("{:-<80}", "");
let mut _plan_cache = PlanCache::new();
for &(rows, cols) in sizes {
let signal = create_test_array(rows, cols);
let _ = fft2(&signal, None, None, None).expect("Operation failed");
let _ = optimized_fft2(&signal, None).expect("Operation failed");
let _ = fft2(&signal, None, None, None).expect("Operation failed");
let mut total_standard = Duration::from_nanos(0);
for _ in 0..iterations {
let start = Instant::now();
let _ = fft2(&signal, None, None, None).expect("Operation failed");
total_standard += start.elapsed();
}
let mut total_efficient = Duration::from_nanos(0);
for _ in 0..iterations {
let start = Instant::now();
let _ = optimized_fft2(&signal, None).expect("Operation failed");
total_efficient += start.elapsed();
}
_plan_cache = PlanCache::new();
let start_first = Instant::now();
let _ = fft2(&signal, None, None, None).expect("Operation failed");
let first_plan_time = start_first.elapsed();
let mut total_planned = Duration::from_nanos(0);
for _ in 0..iterations {
let start = Instant::now();
let _ = fft2(&signal, None, None, None).expect("Operation failed");
total_planned += start.elapsed();
}
let avg_standard = total_standard.as_secs_f64() * 1000.0 / iterations as f64;
let avg_efficient = total_efficient.as_secs_f64() * 1000.0 / iterations as f64;
let avg_planned = total_planned.as_secs_f64() * 1000.0 / iterations as f64;
let min_time = avg_standard.min(avg_efficient).min(avg_planned);
let best_speedup = if min_time > 0.0 {
avg_standard / min_time
} else {
0.0
};
println!(
"{:>6}x{:<8} | {:>15.2} | {:>15.2} | {:>15.2} | {:>15.2}x",
rows, cols, avg_standard, avg_efficient, avg_planned, best_speedup
);
println!(
" Plan creation: {:.2} ms, Cached: {:.2} ms, Cache speedup: {:.2}x",
first_plan_time.as_secs_f64() * 1000.0,
avg_planned,
if avg_planned > 0.0 {
first_plan_time.as_secs_f64() * 1000.0 / avg_planned
} else {
0.0
}
);
}
println!("\nPlan cache used internal caching mechanism");
println!("\nEstimated memory usage comparison:");
println!(
"{:>15} | {:>15} | {:>15} | {:>15}",
"Size", "Standard (MB)", "Efficient (MB)", "Reduction (%)"
);
println!("{:-<70}", "");
for &(rows, cols) in sizes {
let size = rows * cols;
let std_mem =
size as f64 * std::mem::size_of::<Complex64>() as f64 * 3.5 / (1024.0 * 1024.0);
let eff_mem =
size as f64 * std::mem::size_of::<Complex64>() as f64 * 2.0 / (1024.0 * 1024.0);
let reduction = 100.0 * (std_mem - eff_mem) / std_mem;
println!(
"{:>6}x{:<8} | {:>15.2} | {:>15.2} | {:>15.2}%",
rows, cols, std_mem, eff_mem, reduction
);
}
}
#[allow(dead_code)]
fn create_test_signal(size: usize) -> Vec<f64> {
let mut signal = Vec::with_capacity(size);
for i in 0..size {
let x = i as f64 / size as f64;
let value = (2.0 * PI * 4.0 * x).sin() + 0.5 * (2.0 * PI * 8.0 * x).sin();
signal.push(value);
}
signal
}
#[allow(dead_code)]
fn create_test_array(rows: usize, cols: usize) -> Array2<Complex64> {
let mut array = Array2::zeros((rows, cols));
for i in 0..rows {
for j in 0..cols {
let x = i as f64 / rows as f64;
let y = j as f64 / cols as f64;
let value = (2.0 * PI * 4.0 * x).sin() * (2.0 * PI * 4.0 * y).cos();
array[[i, j]] = Complex64::new(value, 0.0);
}
}
array
}
#[allow(dead_code)]
fn optimized_fft(input: &[Complex64]) -> scirs2_fft::error::FFTResult<Vec<Complex64>> {
let mut input_clone = input.to_vec();
let mut output = vec![Complex64::new(0.0, 0.0); input.len()];
fft_inplace(&mut input_clone, &mut output, FftMode::Forward, true)?;
Ok(output)
}
#[allow(dead_code)]
fn optimized_fft2(
input: &Array2<Complex64>,
shape: Option<(usize, usize)>,
) -> scirs2_fft::error::FFTResult<Array2<Complex64>> {
let rows_cols = match shape {
Some(s) => s,
None => {
let shape = input.shape();
(shape[0], shape[1])
}
};
let view = input.view();
fft2_efficient(&view, None, FftMode::Forward, true)
}
mod memory_tracking {
use std::sync::atomic::{AtomicUsize, Ordering};
#[allow(dead_code)]
static ACTIVE_ALLOCATIONS: AtomicUsize = AtomicUsize::new(0);
#[allow(dead_code)]
static TOTAL_ALLOCATED: AtomicUsize = AtomicUsize::new(0);
#[allow(dead_code)]
static PEAK_ALLOCATED: AtomicUsize = AtomicUsize::new(0);
#[allow(dead_code)]
pub fn reset_counters() {
ACTIVE_ALLOCATIONS.store(0, Ordering::SeqCst);
TOTAL_ALLOCATED.store(0, Ordering::SeqCst);
PEAK_ALLOCATED.store(0, Ordering::SeqCst);
}
#[allow(dead_code)]
pub fn get_stats() -> MemoryStats {
MemoryStats {
active_allocations: ACTIVE_ALLOCATIONS.load(Ordering::SeqCst),
total_allocated: TOTAL_ALLOCATED.load(Ordering::SeqCst),
peak_allocated: PEAK_ALLOCATED.load(Ordering::SeqCst),
}
}
#[allow(dead_code)]
pub fn record_allocation(size: usize) {
ACTIVE_ALLOCATIONS.fetch_add(1, Ordering::SeqCst);
TOTAL_ALLOCATED.fetch_add(size, Ordering::SeqCst);
loop {
let current = TOTAL_ALLOCATED.load(Ordering::SeqCst);
let peak = PEAK_ALLOCATED.load(Ordering::SeqCst);
if current <= peak {
break;
}
if PEAK_ALLOCATED
.compare_exchange(peak, current, Ordering::SeqCst, Ordering::SeqCst)
.is_ok()
{
break;
}
}
}
#[allow(dead_code)]
pub fn record_deallocation(size: usize) {
ACTIVE_ALLOCATIONS.fetch_sub(1, Ordering::SeqCst);
TOTAL_ALLOCATED.fetch_sub(size, Ordering::SeqCst);
}
#[derive(Debug, Clone, Copy)]
#[allow(dead_code)]
pub struct MemoryStats {
pub active_allocations: usize,
pub total_allocated: usize,
pub peak_allocated: usize,
}
impl MemoryStats {
#[allow(dead_code)]
pub fn formatsize(size: usize) -> String {
if size < 1024 {
format!("{} B", size)
} else if size < 1024 * 1024 {
format!("{:.2} KB", size as f64 / 1024.0)
} else if size < 1024 * 1024 * 1024 {
format!("{:.2} MB", size as f64 / (1024.0 * 1024.0))
} else {
format!("{:.2} GB", size as f64 / (1024.0 * 1024.0 * 1024.0))
}
}
#[allow(dead_code)]
pub fn peak_memory_str(&self) -> String {
Self::formatsize(self.peak_allocated)
}
}
}