oxits 0.1.0 - Docs.rs

//! Focused image module profiling.
//! cargo run --release --example image_profile [n_timestamps]

use rand::prelude::*;
use rand_distr::Normal;
use std::time::Instant;

use oxits::image::gaf::{Gaf, GafConfig};
use oxits::image::mtf::{Mtf, MtfConfig};
use oxits::image::recurrence_plot::{RecurrencePlot, RecurrencePlotConfig};
use oxits::GafMethod;

fn generate_data(seed: u64, n_samples: usize, n_timestamps: usize) -> Vec<Vec<f64>> {
    let mut rng = StdRng::seed_from_u64(seed);
    let normal = Normal::new(0.0, 1.0).unwrap();
    (0..n_samples)
        .map(|_| (0..n_timestamps).map(|_| normal.sample(&mut rng)).collect())
        .collect()
}

fn bench<F: FnMut()>(label: &str, mut f: F, iters: usize) {
    for _ in 0..5 {
        f();
    }
    let mut times: Vec<f64> = Vec::with_capacity(iters);
    for _ in 0..iters {
        let start = Instant::now();
        f();
        times.push(start.elapsed().as_secs_f64());
    }
    times.sort_by(|a, b| a.partial_cmp(b).unwrap());
    let min = times[0] * 1000.0;
    let median = times[iters / 2] * 1000.0;
    let p90 = times[(iters as f64 * 0.9) as usize] * 1000.0;
    println!("  {label:40}  min={min:8.3}  med={median:8.3}  p90={p90:8.3} ms");
}

fn main() {
    let n = std::env::args()
        .nth(1)
        .and_then(|s| s.parse().ok())
        .unwrap_or(100);
    let n_samples = 50;
    let x = generate_data(42, n_samples, n);
    let iters = 101;

    println!("=== Image Profiling ({n_samples} samples x {n} timestamps, {iters} iters) ===\n");

    // End-to-end
    let config = GafConfig::new();
    bench(
        "GASF (end-to-end)",
        || {
            Gaf::transform(&config, &x);
        },
        iters,
    );
    let config = GafConfig {
        method: GafMethod::Difference,
        ..GafConfig::new()
    };
    bench(
        "GADF (end-to-end)",
        || {
            Gaf::transform(&config, &x);
        },
        iters,
    );
    let config = MtfConfig::new();
    bench(
        "MTF (end-to-end)",
        || {
            Mtf::transform(&config, &x);
        },
        iters,
    );
    let config = RecurrencePlotConfig::new();
    bench(
        "RP (end-to-end)",
        || {
            RecurrencePlot::transform(&config, &x);
        },
        iters,
    );

    // Component breakdown
    println!("\n--- GASF component breakdown ---");

    // 1. scale + phi (allocation of cos_phi + sin_phi)
    let range_min = -1.0_f64;
    let range_max = 1.0_f64;
    bench(
        "1. scale+phi (50 x 2 Vec alloc)",
        || {
            for sample in &x {
                let x_min = sample.iter().copied().fold(f64::INFINITY, f64::min);
                let x_max = sample.iter().copied().fold(f64::NEG_INFINITY, f64::max);
                let scale = (range_max - range_min) / (x_max - x_min);
                let (cp, sp): (Vec<f64>, Vec<f64>) = sample
                    .iter()
                    .map(|&v| {
                        let c = ((v - x_min) * scale + range_min).clamp(-1.0, 1.0);
                        (c, (1.0 - c * c).max(0.0).sqrt())
                    })
                    .unzip();
                std::hint::black_box((&cp, &sp));
            }
        },
        iters,
    );

    // Pre-compute phi for next benchmarks
    let phis: Vec<(Vec<f64>, Vec<f64>)> = x
        .iter()
        .map(|sample| {
            let x_min = sample.iter().copied().fold(f64::INFINITY, f64::min);
            let x_max = sample.iter().copied().fold(f64::NEG_INFINITY, f64::max);
            let scale = (range_max - range_min) / (x_max - x_min);
            sample
                .iter()
                .map(|&v| {
                    let c = ((v - x_min) * scale + range_min).clamp(-1.0, 1.0);
                    (c, (1.0 - c * c).max(0.0).sqrt())
                })
                .unzip()
        })
        .collect();

    // 2. outer product with collect (includes row alloc)
    bench(
        "2. outer product + collect",
        || {
            for (cp, sp) in &phis {
                let _img: Vec<Vec<f64>> = (0..cp.len())
                    .map(|i| {
                        let ci = cp[i];
                        let si = sp[i];
                        cp.iter()
                            .zip(sp.iter())
                            .map(|(&cj, &sj)| ci * cj - si * sj)
                            .collect()
                    })
                    .collect();
                std::hint::black_box(&_img);
            }
        },
        iters,
    );

    // 3. outer product into pre-allocated buffer (no alloc in hot loop)
    let mut images: Vec<Vec<Vec<f64>>> = (0..n_samples)
        .map(|_| (0..n).map(|_| vec![0.0_f64; n]).collect())
        .collect();

    bench(
        "3. outer product pre-alloc fill",
        || {
            for (idx, (cp, sp)) in phis.iter().enumerate() {
                for i in 0..n {
                    let ci = cp[i];
                    let si = sp[i];
                    let row = &mut images[idx][i];
                    for (out, (&cj, &sj)) in row.iter_mut().zip(cp.iter().zip(sp.iter())) {
                        *out = ci * cj - si * sj;
                    }
                }
            }
            std::hint::black_box(&images);
        },
        iters,
    );

    // 4. pure alloc cost (no compute)
    bench(
        &format!("4. alloc only ({n_samples}x {n}x{n})"),
        || {
            for _ in 0..n_samples {
                let _img: Vec<Vec<f64>> = (0..n).map(|_| vec![0.0_f64; n]).collect();
                std::hint::black_box(&_img);
            }
        },
        iters,
    );

    // 5. rayon overhead (trivial per-item work)
    #[cfg(feature = "parallel")]
    bench(
        "5. rayon overhead (50 no-ops)",
        || {
            use rayon::prelude::*;
            let _: Vec<usize> = (0..n_samples)
                .into_par_iter()
                .map(std::hint::black_box)
                .collect();
        },
        iters,
    );

    // 6. Full pipeline (sequential, accumulating)
    bench(
        "6. full pipeline seq accumulate",
        || {
            let _result: Vec<Vec<Vec<f64>>> = phis
                .iter()
                .map(|(cp, sp)| {
                    let n = cp.len();
                    (0..n)
                        .map(|i| {
                            let ci = cp[i];
                            let si = sp[i];
                            cp.iter()
                                .zip(sp.iter())
                                .map(|(&cj, &sj)| ci * cj - si * sj)
                                .collect()
                        })
                        .collect()
                })
                .collect();
            std::hint::black_box(&_result);
        },
        iters,
    );

    // 7. Same but with intermediate phi allocation (matches Gaf::transform)
    bench(
        "7. pipeline + phi alloc (=Gaf)",
        || {
            let _result: Vec<Vec<Vec<f64>>> = x
                .iter()
                .map(|sample| {
                    let x_min = sample.iter().copied().fold(f64::INFINITY, f64::min);
                    let x_max = sample.iter().copied().fold(f64::NEG_INFINITY, f64::max);
                    let scale = (range_max - range_min) / (x_max - x_min);
                    let (cp, sp): (Vec<f64>, Vec<f64>) = sample
                        .iter()
                        .map(|&v| {
                            let c = ((v - x_min) * scale + range_min).clamp(-1.0, 1.0);
                            (c, (1.0 - c * c).max(0.0).sqrt())
                        })
                        .unzip();
                    let n = cp.len();
                    (0..n)
                        .map(|i| {
                            let ci = cp[i];
                            let si = sp[i];
                            cp.iter()
                                .zip(sp.iter())
                                .map(|(&cj, &sj)| ci * cj - si * sj)
                                .collect()
                        })
                        .collect::<Vec<Vec<f64>>>()
                })
                .collect();
            std::hint::black_box(&_result);
        },
        iters,
    );

    // --- Flat buffer strategies ---
    println!("\n--- Flat buffer strategies ---");

    // 8. Single flat alloc + sequential fill + sequential convert
    bench(
        "8. flat seq fill + seq convert",
        || {
            let mut flat = vec![0.0_f64; n_samples * n * n];
            for (idx, (cp, sp)) in phis.iter().enumerate() {
                let base = idx * n * n;
                for i in 0..n {
                    let ci = cp[i];
                    let si = sp[i];
                    let row = &mut flat[base + i * n..base + (i + 1) * n];
                    for (out, (&cj, &sj)) in row.iter_mut().zip(cp.iter().zip(sp.iter())) {
                        *out = ci * cj - si * sj;
                    }
                }
            }
            let _result: Vec<Vec<Vec<f64>>> = flat
                .chunks_exact(n * n)
                .map(|img| img.chunks_exact(n).map(|row| row.to_vec()).collect())
                .collect();
            std::hint::black_box(&_result);
        },
        iters,
    );

    // 9. Single flat alloc + rayon fill + rayon convert
    #[cfg(feature = "parallel")]
    bench(
        "9. flat par fill + par convert",
        || {
            use rayon::prelude::*;
            let mut flat = vec![0.0_f64; n_samples * n * n];
            flat.par_chunks_mut(n * n)
                .zip(phis.par_iter())
                .for_each(|(buf, (cp, sp))| {
                    for i in 0..n {
                        let ci = cp[i];
                        let si = sp[i];
                        let row = &mut buf[i * n..(i + 1) * n];
                        for (out, (&cj, &sj)) in row.iter_mut().zip(cp.iter().zip(sp.iter())) {
                            *out = ci * cj - si * sj;
                        }
                    }
                });
            let _result: Vec<Vec<Vec<f64>>> = flat
                .par_chunks_exact(n * n)
                .map(|img| img.chunks_exact(n).map(|row| row.to_vec()).collect())
                .collect();
            std::hint::black_box(&_result);
        },
        iters,
    );

    // 10. Full flat pipeline (phi alloc + compute + convert, all parallel)
    #[cfg(feature = "parallel")]
    bench(
        "10. flat full pipeline (par)",
        || {
            use rayon::prelude::*;
            let mut flat = vec![0.0_f64; n_samples * n * n];
            flat.par_chunks_mut(n * n)
                .zip(x.par_iter())
                .for_each(|(buf, sample)| {
                    let x_min = sample.iter().copied().fold(f64::INFINITY, f64::min);
                    let x_max = sample.iter().copied().fold(f64::NEG_INFINITY, f64::max);
                    let scale = (range_max - range_min) / (x_max - x_min);
                    let (cp, sp): (Vec<f64>, Vec<f64>) = sample
                        .iter()
                        .map(|&v| {
                            let c = ((v - x_min) * scale + range_min).clamp(-1.0, 1.0);
                            (c, (1.0 - c * c).max(0.0).sqrt())
                        })
                        .unzip();
                    for i in 0..n {
                        let ci = cp[i];
                        let si = sp[i];
                        let row = &mut buf[i * n..(i + 1) * n];
                        for (out, (&cj, &sj)) in row.iter_mut().zip(cp.iter().zip(sp.iter())) {
                            *out = ci * cj - si * sj;
                        }
                    }
                });
            let _result: Vec<Vec<Vec<f64>>> = flat
                .par_chunks_exact(n * n)
                .map(|img| img.chunks_exact(n).map(|row| row.to_vec()).collect())
                .collect();
            std::hint::black_box(&_result);
        },
        iters,
    );

    // 11. Current Gaf::transform (for direct comparison)
    let config = GafConfig::new();
    bench(
        "11. Gaf::transform (current)",
        || {
            let _r = Gaf::transform(&config, &x);
            std::hint::black_box(&_r);
        },
        iters,
    );
}