p3-field-testing 0.3.0-succinct

use alloc::format;
use alloc::vec::Vec;

use criterion::{black_box, BatchSize, Criterion};
use p3_field::Field;
use rand::distributions::Standard;
use rand::prelude::Distribution;
use rand::Rng;

pub fn benchmark_square<F: Field>(c: &mut Criterion, name: &str)
where
    Standard: Distribution<F>,
{
    let mut rng = rand::thread_rng();
    let x = rng.gen::<F>();
    c.bench_function(&format!("{} square", name), |b| {
        b.iter(|| black_box(black_box(x).square()))
    });
}

pub fn benchmark_inv<F: Field>(c: &mut Criterion, name: &str)
where
    Standard: Distribution<F>,
{
    let mut rng = rand::thread_rng();
    let x = rng.gen::<F>();
    c.bench_function(&format!("{} inv", name), |b| {
        b.iter(|| black_box(black_box(x)).inverse())
    });
}

pub fn benchmark_mul<F: Field>(c: &mut Criterion, name: &str)
where
    Standard: Distribution<F>,
{
    let mut rng = rand::thread_rng();
    let x = rng.gen::<F>();
    let y = rng.gen::<F>();
    c.bench_function(&format!("{} mul", name), |b| {
        b.iter(|| black_box(black_box(x) * black_box(y)))
    });
}

/// Benchmark the time taken to sum an array [F; N] using .sum() method.
/// Repeat the summation REPS times.
pub fn benchmark_iter_sum<F: Field, const N: usize, const REPS: usize>(
    c: &mut Criterion,
    name: &str,
) where
    Standard: Distribution<F>,
{
    let mut rng = rand::thread_rng();
    let mut input = Vec::new();
    for _ in 0..REPS {
        input.push(rng.gen::<[F; N]>())
    }
    let mut output = [F::zero(); REPS];
    c.bench_function(&format!("{} sum/{}, {}", name, REPS, N), |b| {
        b.iter(|| {
            for i in 0..REPS {
                output[i] = input[i].iter().cloned().sum()
            }
            output
        })
    });
}

pub fn benchmark_add_latency<F: Field, const N: usize>(c: &mut Criterion, name: &str)
where
    Standard: Distribution<F>,
{
    c.bench_function(&format!("{} add-latency/{}", name, N), |b| {
        b.iter_batched(
            || {
                let mut rng = rand::thread_rng();
                let mut vec = Vec::new();
                for _ in 0..N {
                    vec.push(rng.gen::<F>())
                }
                vec
            },
            |x| x.iter().fold(F::zero(), |x, y| x + *y),
            BatchSize::SmallInput,
        )
    });
}

pub fn benchmark_add_throughput<F: Field, const N: usize>(c: &mut Criterion, name: &str)
where
    Standard: Distribution<F>,
{
    c.bench_function(&format!("{} add-throughput/{}", name, N), |b| {
        b.iter_batched(
            || {
                let mut rng = rand::thread_rng();
                (
                    rng.gen::<F>(),
                    rng.gen::<F>(),
                    rng.gen::<F>(),
                    rng.gen::<F>(),
                    rng.gen::<F>(),
                    rng.gen::<F>(),
                    rng.gen::<F>(),
                    rng.gen::<F>(),
                    rng.gen::<F>(),
                    rng.gen::<F>(),
                )
            },
            |(mut a, mut b, mut c, mut d, mut e, mut f, mut g, mut h, mut i, mut j)| {
                for _ in 0..N {
                    (a, b, c, d, e, f, g, h, i, j) = (
                        a + b,
                        b + c,
                        c + d,
                        d + e,
                        e + f,
                        f + g,
                        g + h,
                        h + i,
                        i + j,
                        j + a,
                    );
                }
                (a, b, c, d, e, f, g, h, i, j)
            },
            BatchSize::SmallInput,
        )
    });
}

pub fn benchmark_sub_latency<F: Field, const N: usize>(c: &mut Criterion, name: &str)
where
    Standard: Distribution<F>,
{
    c.bench_function(&format!("{} sub-latency/{}", name, N), |b| {
        b.iter_batched(
            || {
                let mut rng = rand::thread_rng();
                let mut vec = Vec::new();
                for _ in 0..N {
                    vec.push(rng.gen::<F>())
                }
                vec
            },
            |x| x.iter().fold(F::zero(), |x, y| x - *y),
            BatchSize::SmallInput,
        )
    });
}

pub fn benchmark_sub_throughput<F: Field, const N: usize>(c: &mut Criterion, name: &str)
where
    Standard: Distribution<F>,
{
    c.bench_function(&format!("{} sub-throughput/{}", name, N), |b| {
        b.iter_batched(
            || {
                let mut rng = rand::thread_rng();
                (
                    rng.gen::<F>(),
                    rng.gen::<F>(),
                    rng.gen::<F>(),
                    rng.gen::<F>(),
                    rng.gen::<F>(),
                    rng.gen::<F>(),
                    rng.gen::<F>(),
                    rng.gen::<F>(),
                    rng.gen::<F>(),
                    rng.gen::<F>(),
                )
            },
            |(mut a, mut b, mut c, mut d, mut e, mut f, mut g, mut h, mut i, mut j)| {
                for _ in 0..N {
                    (a, b, c, d, e, f, g, h, i, j) = (
                        a - b,
                        b - c,
                        c - d,
                        d - e,
                        e - f,
                        f - g,
                        g - h,
                        h - i,
                        i - j,
                        j - a,
                    );
                }
                (a, b, c, d, e, f, g, h, i, j)
            },
            BatchSize::SmallInput,
        )
    });
}