physics_in_parallel 3.0.3

High-performance infrastructure for numerical simulations in physics
Documentation
use std::hint::black_box;
use std::time::{Duration, Instant};

use physics_in_parallel::math::tensor::{RandType, RngKind, TensorRandFiller, TensorTrait, dense};
use rand::SeedableRng;
use rand::rngs::SmallRng;
use rand_chacha::{ChaCha8Rng, ChaCha12Rng, ChaCha20Rng};
use rand_distr::{Distribution, Uniform};
use rand_pcg::{Pcg64, Pcg64Mcg};

const DEFAULT_LEN: usize = 120_000_000;
const DEFAULT_REPEATS: usize = 3;
const SEED: u64 = 0x5EED_1234;
const DEFAULT_RNG_KINDS: &[RngKind] = &[RngKind::Pcg64, RngKind::Pcg64Mcg, RngKind::SmallRng];

fn main() {
    let len = parse_arg(1).unwrap_or(DEFAULT_LEN);
    let repeats = parse_arg(2).unwrap_or(DEFAULT_REPEATS);
    let rngs = parse_arg(3).unwrap_or_else(rayon::current_num_threads);
    let rng_kinds = parse_rng_kinds(4).unwrap_or_else(|| DEFAULT_RNG_KINDS.to_vec());
    let kind = RandType::Uniform {
        low: -1.0,
        high: 1.0,
    };

    println!("tensor random fill benchmark");
    println!("elements: {len}");
    println!("bytes per array: {}", bytes_human(len * size_of::<f64>()));
    println!("repeats: {repeats}");
    println!("rayon threads available: {}", rayon::current_num_threads());
    println!("rayon threads: {rngs}");
    println!(
        "rng kinds: {}",
        rng_kinds
            .iter()
            .map(|kind| kind.name())
            .collect::<Vec<_>>()
            .join(", ")
    );
    println!();

    println!(
        "{:<12} {:>14} {:>14} {:>14} {:>14} {:>12} {:>12}",
        "rng", "tensor best", "tensor avg", "seq best", "seq avg", "best x", "avg x"
    );
    println!("{}", "-".repeat(100));

    for rng_kind in rng_kinds {
        let tensor = benchmark_tensor_fill(len, repeats, kind, rngs, rng_kind);
        let sequential = benchmark_sequential_vec_fill(len, repeats, rng_kind);
        report(rng_kind, &tensor, &sequential);
    }
}

#[derive(Debug, Clone, Copy)]
struct Timing {
    best: Duration,
    average: Duration,
    checksum: f64,
}

fn benchmark_tensor_fill(
    len: usize,
    repeats: usize,
    kind: RandType,
    rngs: usize,
    rng_kind: RngKind,
) -> Timing {
    let mut tensor = dense::Tensor::<f64>::empty(&[len]);
    let mut filler =
        TensorRandFiller::new_with_seed_and_rng_kind(kind, Some(rngs), SEED, Some(rng_kind));

    filler.refresh(&mut tensor);

    let timings = (0..repeats)
        .map(|_| {
            let start = Instant::now();
            filler.refresh(&mut tensor);
            start.elapsed()
        })
        .collect::<Vec<_>>();

    Timing {
        best: *timings.iter().min().expect("at least one repeat"),
        average: average(&timings),
        checksum: black_box(tensor.get_sum()),
    }
}

fn benchmark_sequential_vec_fill(len: usize, repeats: usize, rng_kind: RngKind) -> Timing {
    match rng_kind {
        RngKind::SmallRng => {
            benchmark_sequential_vec_fill_with_rng(len, repeats, SmallRng::seed_from_u64(SEED))
        }
        RngKind::Pcg64Mcg => {
            benchmark_sequential_vec_fill_with_rng(len, repeats, Pcg64Mcg::seed_from_u64(SEED))
        }
        RngKind::Pcg64 => {
            benchmark_sequential_vec_fill_with_rng(len, repeats, Pcg64::seed_from_u64(SEED))
        }
        RngKind::ChaCha8 => {
            benchmark_sequential_vec_fill_with_rng(len, repeats, ChaCha8Rng::seed_from_u64(SEED))
        }
        RngKind::ChaCha12 => {
            benchmark_sequential_vec_fill_with_rng(len, repeats, ChaCha12Rng::seed_from_u64(SEED))
        }
        RngKind::ChaCha20 => {
            benchmark_sequential_vec_fill_with_rng(len, repeats, ChaCha20Rng::seed_from_u64(SEED))
        }
    }
}

fn benchmark_sequential_vec_fill_with_rng<R>(len: usize, repeats: usize, mut rng: R) -> Timing
where
    R: rand::Rng,
{
    let dist = Uniform::new(-1.0, 1.0).expect("valid uniform bounds");
    let mut data = vec![0.0; len];

    fill_sequential_with_rng(&mut data, &dist, &mut rng);

    let timings = (0..repeats)
        .map(|_| {
            let start = Instant::now();
            fill_sequential_with_rng(&mut data, &dist, &mut rng);
            start.elapsed()
        })
        .collect::<Vec<_>>();

    Timing {
        best: *timings.iter().min().expect("at least one repeat"),
        average: average(&timings),
        checksum: checksum(&data),
    }
}

fn fill_sequential_with_rng<R>(data: &mut [f64], dist: &Uniform<f64>, rng: &mut R)
where
    R: rand::Rng + ?Sized,
{
    for x in data {
        *x = dist.sample(rng);
    }
}

fn checksum(data: &[f64]) -> f64 {
    data.iter().copied().sum::<f64>().pipe(black_box)
}

fn average(times: &[Duration]) -> Duration {
    let total = times.iter().map(Duration::as_secs_f64).sum::<f64>();
    Duration::from_secs_f64(total / times.len() as f64)
}

fn report(rng_kind: RngKind, tensor: &Timing, sequential: &Timing) {
    println!(
        "{:<12} {:>14} {:>14} {:>14} {:>14} {:>12.3} {:>12.3}",
        rng_kind.name(),
        millis(tensor.best),
        millis(tensor.average),
        millis(sequential.best),
        millis(sequential.average),
        sequential.best.as_secs_f64() / tensor.best.as_secs_f64(),
        sequential.average.as_secs_f64() / tensor.average.as_secs_f64()
    );
    println!(
        "{:<12} tensor checksum {:>16.6}, sequential checksum {:>16.6}",
        "", tensor.checksum, sequential.checksum
    );
}

fn millis(duration: Duration) -> String {
    format!("{:.3} ms", duration.as_secs_f64() * 1_000.0)
}

fn parse_arg(position: usize) -> Option<usize> {
    std::env::args().nth(position)?.parse().ok()
}

fn parse_rng_kinds(position: usize) -> Option<Vec<RngKind>> {
    let arg = std::env::args().nth(position)?;
    if arg.eq_ignore_ascii_case("all") {
        return Some(vec![
            RngKind::Pcg64,
            RngKind::Pcg64Mcg,
            RngKind::SmallRng,
            RngKind::ChaCha8,
            RngKind::ChaCha12,
            RngKind::ChaCha20,
        ]);
    }

    let kinds = arg
        .split(',')
        .filter_map(|name| RngKind::from_name(name.trim()))
        .collect::<Vec<_>>();

    if kinds.is_empty() { None } else { Some(kinds) }
}

fn bytes_human(bytes: usize) -> String {
    const MIB: f64 = 1024.0 * 1024.0;
    const GIB: f64 = 1024.0 * 1024.0 * 1024.0;
    let bytes = bytes as f64;
    if bytes >= GIB {
        format!("{:.2} GiB", bytes / GIB)
    } else {
        format!("{:.2} MiB", bytes / MIB)
    }
}

trait Pipe: Sized {
    fn pipe<R>(self, f: impl FnOnce(Self) -> R) -> R {
        f(self)
    }
}

impl<T> Pipe for T {}