use scirs2_core::ndarray::Array3;
use scirs2_fft::{fftn, fftn_memory_efficient, fftn_optimized};
use std::time::Instant;
#[allow(dead_code)]
fn main() {
println!("Optimized N-dimensional FFT Example");
println!("===================================");
println!();
let sizes = vec![(64, 64, 64), (128, 128, 32), (256, 64, 64)];
for size in sizes {
println!("Testing size: {size:?}");
let array = Array3::from_shape_fn(size, |(i, j, k)| {
((i as f64).sin() + (j as f64).cos() + (k as f64).tan()) / 3.0
});
let start = Instant::now();
let _result_std = fftn(&array.to_owned().into_dyn(), None, None, None, None, None)
.expect("Operation failed");
let time_standard = start.elapsed();
let start = Instant::now();
let _result_opt = fftn_optimized(&array.view(), None, None).expect("Operation failed");
let time_optimized = start.elapsed();
let speedup = time_standard.as_secs_f64() / time_optimized.as_secs_f64();
println!(" Standard FFT: {time_standard:?}");
println!(" Optimized FFT: {time_optimized:?}");
println!(" Speedup: {speedup:.2}x");
println!();
}
println!("Memory-efficient FFT for large arrays:");
println!("-------------------------------------");
let largesize = (512, 512, 16);
let large_array = Array3::from_shape_fn(largesize, |(i, j, k)| (i + j + k) as f64);
let memory_gb =
(large_array.len() * std::mem::size_of::<f64>()) as f64 / (1024.0 * 1024.0 * 1024.0);
println!("Array size: {largesize:?}, Memory: {memory_gb:.2} GB");
let memory_limits = vec![0.5, 1.0, 2.0];
for limit in memory_limits {
println!("\nMemory limit: {limit:.1} GB");
let start = Instant::now();
let result = fftn_memory_efficient(&large_array.view(), None, limit);
let duration = start.elapsed();
match result {
Ok(_) => println!(" Success! Time: {duration:?}"),
Err(e) => println!(" Error: {e}"),
}
}
println!("\n\nAxis ordering optimization:");
println!("--------------------------");
let asymmetric = Array3::from_shape_fn((256, 32, 128), |(i, j, k)| (i * j * k) as f64);
let axis_orders = vec![vec![0, 1, 2], vec![1, 2, 0], vec![2, 0, 1]];
for axes in axis_orders {
println!("\nAxis order: {axes:?}");
let start = Instant::now();
let _result =
fftn_optimized(&asymmetric.view(), None, Some(axes)).expect("Operation failed");
let duration = start.elapsed();
println!(" Time: {duration:?}");
}
println!("\n\nCache-friendly chunking:");
println!("-----------------------");
let chunk_test = Array3::from_shape_fn((128, 128, 128), |(i, j, k)| (i ^ j ^ k) as f64);
let start = Instant::now();
let _result = fftn_optimized(&chunk_test.view(), None, None).expect("Operation failed");
let duration = start.elapsed();
println!("FFT with optimized chunking: {duration:?}");
println!("\nChunk size analysis:");
let total_elements = chunk_test.len();
println!("Total elements: {total_elements}");
println!("Optimal chunk size: determined automatically based on cache size");
println!("\n\nVerifying correctness:");
println!("---------------------");
let test_array = Array3::from_shape_fn((32, 32, 32), |(i, j, k)| {
(i as f64 * 0.1) + (j as f64 * 0.01) + (k as f64 * 0.001)
});
let result_standard = fftn(
&test_array.to_owned().into_dyn(),
None,
None,
None,
None,
None,
)
.expect("Operation failed");
let result_optimized =
fftn_optimized(&test_array.view(), None, None).expect("Operation failed");
let max_diff = result_standard
.iter()
.zip(result_optimized.iter())
.map(|(a, b)| (a.re - b.re).abs() + (a.im - b.im).abs())
.fold(0.0, f64::max);
println!("Maximum difference between standard and optimized: {max_diff:.2e}");
println!("Results match: {}", max_diff < 1e-10);
}