use plotly::{
common::{Mode, Title},
Layout, Plot, Scatter,
};
use scirs2_core::random::prelude::*;
use scirs2_core::random::rngs::StdRng;
use scirs2_core::random::{Distribution, Normal};
use scirs2_core::Complex64;
use scirs2_fft::{
sparse_fft::{reconstruct_time_domain, SparseFFTAlgorithm, SparseFFTResult},
sparse_fft_cuda_kernels::execute_cuda_sublinear_sparse_fft,
sparse_fft_cuda_kernels_iterative::execute_cuda_iterative_sparse_fft,
sparse_fft_gpu::GPUBackend,
sparse_fft_gpu_cuda::{cuda_sparse_fft, get_cuda_devices},
sparse_fft_gpu_memory::is_cuda_available,
sparse_fft_gpu_memory::{init_global_memory_manager, AllocationStrategy},
};
use std::f64::consts::PI;
use std::time::Instant;
#[allow(dead_code)]
fn create_sparse_signal(n: usize, frequencies: &[(usize, f64)], noise_level: f64) -> Vec<f64> {
let mut rng = StdRng::seed_from_u64(42);
let normal = Normal::new(0.0, noise_level).expect("Operation failed");
let mut signal = vec![0.0; n];
for (i, sig) in signal.iter_mut().enumerate().take(n) {
let t = 2.0 * PI * (i as f64) / (n as f64);
for &(freq, amp) in frequencies {
*sig += amp * (freq as f64 * t).sin();
}
}
if noise_level > 0.0 {
for sample in &mut signal {
*sample += normal.sample(&mut rng);
}
}
signal
}
#[allow(dead_code)]
fn evaluate_accuracy(
result: &SparseFFTResult,
true_frequencies: &[(usize, f64)],
n: usize,
) -> (f64, f64, usize) {
let mut true_positives = 0;
let mut _false_positives = 0;
let mut found_indices = vec![false; true_frequencies.len()];
for &idx in &result.indices {
let mut found = false;
for (i, &(freq_, _)) in true_frequencies.iter().enumerate() {
let tolerance = std::cmp::max(1, n / 1000);
if (idx as i64 - freq_ as i64).abs() <= tolerance as i64 {
found = true;
found_indices[i] = true;
break;
}
}
if found {
true_positives += 1;
} else {
_false_positives += 1; }
}
let precision = true_positives as f64 / result.indices.len() as f64;
let recall = true_positives as f64 / true_frequencies.len() as f64;
(precision, recall, true_positives)
}
#[allow(dead_code)]
fn run_algorithm_comparison(n: usize, sparsity: usize, noise_level: f64) {
println!("\nRunning algorithm comparison:");
println!(" Signal size: {n}");
println!(" Expected sparsity: {sparsity}");
println!(" Noise level: {noise_level:.3}");
let frequencies = vec![
(30, 1.0), (70, 0.5), (150, 0.25), (350, 0.15), (700, 0.1), ];
println!("\nTrue frequency components:");
for (i, &(freq, amp)) in frequencies.iter().enumerate() {
println!(" {}. Frequency {} with amplitude {:.3}", i + 1, freq, amp);
}
let signal = create_sparse_signal(n, &frequencies, noise_level);
let algorithms = [
SparseFFTAlgorithm::Sublinear,
SparseFFTAlgorithm::CompressedSensing,
SparseFFTAlgorithm::Iterative,
];
let mut time_domain_plot = Plot::new();
let mut frequency_domain_plot = Plot::new();
let time_trace = Scatter::new(
(0..std::cmp::min(n, 500)).collect::<Vec<_>>(),
signal[0..std::cmp::min(n, 500)].to_vec(),
)
.mode(Mode::Lines)
.name("Time Domain Signal (first 500 samples)");
time_domain_plot.add_trace(time_trace);
println!("\nResults:");
println!(
"{:<20} {:<15} {:<15} {:<15} {:<15}",
"Algorithm", "CPU Time (ms)", "GPU Time (ms)", "Precision", "Recall"
);
println!("{:-<80}", "");
for &algorithm in &algorithms {
let cpu_start = Instant::now();
let cpu_result =
scirs2_fft::sparse_fft::sparse_fft(&signal, sparsity, Some(algorithm), None)
.expect("Operation failed");
let cpu_time = cpu_start.elapsed().as_millis();
let (cpu_precision, cpu_recall, cpu_true_positives) =
evaluate_accuracy(&cpu_result, &frequencies, n);
let gpu_time;
let gpu_result;
let gpu_accuracy;
if is_cuda_available() {
let _ = init_global_memory_manager(
GPUBackend::CUDA,
0, AllocationStrategy::CacheBySize,
1024 * 1024 * 1024, );
let gpu_start = Instant::now();
gpu_result = cuda_sparse_fft(
&signal,
sparsity,
0, Some(algorithm),
None,
)
.expect("Operation failed");
gpu_time = gpu_start.elapsed().as_millis();
gpu_accuracy = evaluate_accuracy(&gpu_result, &frequencies, n);
} else {
gpu_time = 0;
gpu_result = cpu_result.clone();
gpu_accuracy = (cpu_precision, cpu_recall, cpu_true_positives);
}
println!(
"{:<20} {:<15} {:<15} {:<15.3} {:<15.3}",
format!("{:?}", algorithm),
cpu_time,
if is_cuda_available() {
gpu_time.to_string()
} else {
"N/A".to_string()
},
gpu_accuracy.0,
gpu_accuracy.1
);
let mut indices: Vec<usize> = Vec::new();
let mut amplitudes: Vec<f64> = Vec::new();
let max_freq_to_plot = n / 2;
for (&idx, &val) in gpu_result.indices.iter().zip(gpu_result.values.iter()) {
if idx < max_freq_to_plot {
indices.push(idx);
amplitudes.push(val.norm());
}
}
let freq_trace = Scatter::new(indices, amplitudes)
.mode(Mode::Markers)
.name(format!("{algorithm:?} Algorithm"));
frequency_domain_plot.add_trace(freq_trace);
println!(
"\n {:?} Algorithm found {} significant frequencies:",
algorithm,
gpu_result.values.len()
);
let mut components: Vec<(usize, Complex64)> = gpu_result
.indices
.iter()
.zip(gpu_result.values.iter())
.map(|(&idx, &val)| (idx, val))
.collect();
components.sort_by(|a, b| {
b.1.norm()
.partial_cmp(&a.1.norm())
.expect("Operation failed")
});
for (i, (idx, val)) in components.iter().take(10).enumerate() {
println!(
" {}. Frequency {} with magnitude {:.6}",
i + 1,
idx,
val.norm()
);
}
}
time_domain_plot.set_layout(
Layout::new()
.title(Title::with_text("<b>Time Domain Signal</b>"))
.x_axis(plotly::layout::Axis::new().title(Title::with_text("Sample")))
.y_axis(plotly::layout::Axis::new().title(Title::with_text("Amplitude"))),
);
frequency_domain_plot.set_layout(
Layout::new()
.title(Title::with_text("<b>Frequency Domain Components</b>"))
.x_axis(plotly::layout::Axis::new().title(Title::with_text("Frequency Bin")))
.y_axis(plotly::layout::Axis::new().title(Title::with_text("Magnitude"))),
);
time_domain_plot.write_html("sparse_fft_time_domain.html");
frequency_domain_plot.write_html("sparse_fft_frequency_domain.html");
println!("\nPlots saved as sparse_fft_time_domain.html and sparse_fft_frequency_domain.html");
}
#[allow(dead_code)]
fn runsize_benchmark() {
println!("\nRunning size benchmark:");
if !is_cuda_available() {
println!("CUDA is not available. Skipping GPU benchmarks.");
return;
}
let _ = init_global_memory_manager(
GPUBackend::CUDA,
0, AllocationStrategy::CacheBySize,
1024 * 1024 * 1024, );
let sizes = [
1024,
4 * 1024,
16 * 1024,
64 * 1024,
256 * 1024,
1024 * 1024,
];
println!(
"\n{:<15} {:<20} {:<20} {:<20}",
"Signal Size", "Sublinear (ms)", "CompressedSensing (ms)", "Iterative (ms)"
);
println!("{:-<80}", "");
for &size in &sizes {
let frequencies = vec![(30, 1.0), (70, 0.5), (150, 0.25), (350, 0.15), (700, 0.1)];
let signal = create_sparse_signal(size, &frequencies, 0.01);
let mut times = Vec::new();
for algorithm in [
SparseFFTAlgorithm::Sublinear,
SparseFFTAlgorithm::CompressedSensing,
SparseFFTAlgorithm::Iterative,
] {
let start = Instant::now();
let _ = cuda_sparse_fft(
&signal,
6, 0, Some(algorithm),
None,
)
.expect("Operation failed");
let elapsed = start.elapsed().as_millis();
times.push(elapsed);
}
println!(
"{:<15} {:<20} {:<20} {:<20}",
size, times[0], times[1], times[2]
);
}
}
#[allow(dead_code)]
fn run_noise_benchmark() {
println!("\nRunning noise tolerance benchmark:");
if !is_cuda_available() {
println!("CUDA is not available. Skipping GPU benchmarks.");
return;
}
let _ = init_global_memory_manager(
GPUBackend::CUDA,
0, AllocationStrategy::CacheBySize,
1024 * 1024 * 1024, );
let noise_levels = [0.0, 0.01, 0.05, 0.1, 0.2, 0.5, 1.0];
let n = 16 * 1024;
let frequencies = vec![(30, 1.0), (70, 0.5), (150, 0.25), (350, 0.15), (700, 0.1)];
println!(
"\n{:<15} {:<15} {:<15} {:<15}",
"Noise Level", "Sublinear", "CompressedSensing", "Iterative"
);
println!("{:-<65}", "");
for &noise in &noise_levels {
let signal = create_sparse_signal(n, &frequencies, noise);
let mut accuracies = Vec::new();
for algorithm in [
SparseFFTAlgorithm::Sublinear,
SparseFFTAlgorithm::CompressedSensing,
SparseFFTAlgorithm::Iterative,
] {
let result = cuda_sparse_fft(
&signal,
6, 0, Some(algorithm),
None,
)
.expect("Operation failed");
let (_, recall_, _) = evaluate_accuracy(&result, &frequencies, n);
accuracies.push(recall_);
}
println!(
"{:<15.3} {:<15.3} {:<15.3} {:<15.3}",
noise, accuracies[0], accuracies[1], accuracies[2]
);
}
}
#[allow(dead_code)]
fn run_iteration_comparison() {
println!("\nDemonstrating impact of iterations on Iterative Sparse FFT:");
if !is_cuda_available() {
println!("CUDA is not available. Skipping GPU benchmarks.");
return;
}
let n = 16 * 1024;
let sparsity = 10;
let frequencies = vec![
(30, 1.0), (70, 0.5), (150, 0.25), (200, 0.2), (270, 0.15), (350, 0.1), (420, 0.075), (500, 0.05), (600, 0.025), (700, 0.01), ];
println!("\nTrue frequency components (in descending magnitude):");
for (i, &(freq, amp)) in frequencies.iter().enumerate() {
println!(" {}. Frequency {} with amplitude {:.6}", i + 1, freq, amp);
}
let signal = create_sparse_signal(n, &frequencies, 0.005);
let iterations_to_test = [1, 2, 3, 5, 8, 10];
println!(
"\n{:<15} {:<15} {:<15} {:<20}",
"Iterations", "True Positives", "Recall", "Time (ms)"
);
println!("{:-<65}", "");
let mut plot = Plot::new();
for &iterations in &iterations_to_test {
let start = Instant::now();
let result = execute_cuda_iterative_sparse_fft(&signal, sparsity, iterations)
.expect("Operation failed");
let elapsed = start.elapsed().as_millis();
let (_, recall, true_positives) = evaluate_accuracy(&result, &frequencies, n);
println!("{iterations:<15} {true_positives:<15} {recall:<15.3} {elapsed:<20}");
let reconstructed = reconstruct_time_domain(&result, n).expect("Operation failed");
let mut error = 0.0;
for i in 0..n {
let sample_error = (signal[i] - reconstructed[i].re).abs();
error += sample_error * sample_error;
}
error = (error / n as f64).sqrt();
let trace = Scatter::new(
vec![iterations],
vec![recall * 100.0], )
.mode(Mode::Markers)
.marker(plotly::common::Marker::new().size(10 + 2 * iterations))
.name(format!("{iterations} iterations (RMSE: {error:.5})"));
plot.add_trace(trace);
if iterations == 1 || iterations == *iterations_to_test.last().expect("Operation failed") {
println!("\n With {iterations} iterations, found components:");
let mut components: Vec<(usize, Complex64)> = result
.indices
.iter()
.zip(result.values.iter())
.map(|(&idx, &val)| (idx, val))
.collect();
components.sort_by(|a, b| {
b.1.norm()
.partial_cmp(&a.1.norm())
.expect("Operation failed")
});
for (i, (idx, val)) in components.iter().take(10).enumerate() {
let mut is_true = false;
for &(freq_, _) in &frequencies {
let tolerance = std::cmp::max(1, n / 1000);
if (*idx as i64 - freq_ as i64).abs() <= tolerance as i64 {
is_true = true;
break;
}
}
println!(
" {}. Frequency {} with magnitude {:.6} {}",
i + 1,
idx,
val.norm(),
if is_true {
"(TRUE)"
} else {
"(False positive)"
}
);
}
}
}
plot.set_layout(
Layout::new()
.title(Title::with_text(
"<b>Effect of Iterations on Iterative Sparse FFT Accuracy</b>",
))
.x_axis(plotly::layout::Axis::new().title(Title::with_text("Number of Iterations")))
.y_axis(
plotly::layout::Axis::new()
.title(Title::with_text("Recall (%)"))
.range(vec![0.0, 100.0]),
),
);
plot.write_html("iterative_sparse_fft_iterations.html");
println!("\nPlot saved as iterative_sparse_fft_iterations.html");
println!("\nComparing with Sublinear algorithm (single pass):");
let start = Instant::now();
let sublinear_result =
execute_cuda_sublinear_sparse_fft(&signal, sparsity, SparseFFTAlgorithm::Sublinear)
.expect("Operation failed");
let elapsed = start.elapsed().as_millis();
let (_, recall, true_positives) = evaluate_accuracy(&sublinear_result, &frequencies, n);
println!(
"Sublinear: Found {}/{} components (Recall: {:.3}) in {} ms",
true_positives,
frequencies.len(),
recall,
elapsed
);
let reconstructed = reconstruct_time_domain(&sublinear_result, n).expect("Operation failed");
let mut error = 0.0;
for i in 0..n {
let sample_error = (signal[i] - reconstructed[i].re).abs();
error += sample_error * sample_error;
}
error = (error / n as f64).sqrt();
println!("Sublinear reconstruction RMSE: {error:.6}");
println!("\n Sublinear algorithm found components:");
let mut components: Vec<(usize, Complex64)> = sublinear_result
.indices
.iter()
.zip(sublinear_result.values.iter())
.map(|(&idx, &val)| (idx, val))
.collect();
components.sort_by(|a, b| {
b.1.norm()
.partial_cmp(&a.1.norm())
.expect("Operation failed")
});
for (i, (idx, val)) in components.iter().take(10).enumerate() {
let mut is_true = false;
for &(freq_, _) in &frequencies {
let tolerance = std::cmp::max(1, n / 1000);
if (*idx as i64 - freq_ as i64).abs() <= tolerance as i64 {
is_true = true;
break;
}
}
println!(
" {}. Frequency {} with magnitude {:.6} {}",
i + 1,
idx,
val.norm(),
if is_true {
"(TRUE)"
} else {
"(False positive)"
}
);
}
}
#[allow(dead_code)]
fn main() {
println!("GPU-Accelerated Iterative Sparse FFT Example");
println!("============================================");
if is_cuda_available() {
let devices = get_cuda_devices().expect("Operation failed");
println!("\nCUDA is available with {} device(s):", devices.len());
for (idx, device) in devices.iter().enumerate() {
println!(" - Device {} (initialized: {})", idx, device.initialized);
}
} else {
println!("\nCUDA is not available. This example will use CPU implementations.");
}
run_iteration_comparison();
run_algorithm_comparison(16 * 1024, 6, 0.05);
runsize_benchmark();
run_noise_benchmark();
println!("\nExample completed successfully!");
}