p3_field_testing/
bench_func.rs

1use alloc::format;
2use alloc::vec::Vec;
3use core::hint::black_box;
4
5use criterion::{BatchSize, Criterion};
6use p3_field::{Field, PrimeCharacteristicRing};
7use rand::distr::StandardUniform;
8use rand::prelude::Distribution;
9use rand::rngs::SmallRng;
10use rand::{Rng, SeedableRng};
11
12/// Not useful for benchmarking prime fields as multiplication is too fast but
13/// handy for extension fields.
14pub fn benchmark_mul<F: Field>(c: &mut Criterion, name: &str)
15where
16    StandardUniform: Distribution<F>,
17{
18    let mut rng = SmallRng::seed_from_u64(1);
19    let x = rng.random::<F>();
20    let y = rng.random::<F>();
21    c.bench_function(&format!("{} mul", name), |b| {
22        b.iter(|| black_box(black_box(x) * black_box(y)))
23    });
24}
25
26pub fn benchmark_square<F: Field>(c: &mut Criterion, name: &str)
27where
28    StandardUniform: Distribution<F>,
29{
30    let mut rng = SmallRng::seed_from_u64(1);
31    let x = rng.random::<F>();
32    c.bench_function(&format!("{} square", name), |b| {
33        b.iter(|| black_box(black_box(x).square()))
34    });
35}
36
37pub fn benchmark_inv<F: Field>(c: &mut Criterion, name: &str)
38where
39    StandardUniform: Distribution<F>,
40{
41    let mut rng = SmallRng::seed_from_u64(1);
42    let x = rng.random::<F>();
43    c.bench_function(&format!("{} inv", name), |b| {
44        b.iter(|| black_box(black_box(x)).inverse())
45    });
46}
47
48pub fn benchmark_mul_2exp<R: PrimeCharacteristicRing + Copy, const REPS: usize>(
49    c: &mut Criterion,
50    name: &str,
51    val: u64,
52) where
53    StandardUniform: Distribution<R>,
54{
55    let mut rng = SmallRng::seed_from_u64(1);
56    let mut input = Vec::new();
57    for _ in 0..REPS {
58        input.push(rng.random::<R>())
59    }
60    c.bench_function(&format!("{} mul_2exp_u64 {}", name, val), |b| {
61        b.iter(|| input.iter_mut().for_each(|i| *i = i.mul_2exp_u64(val)))
62    });
63}
64
65pub fn benchmark_div_2exp<F: Field, const REPS: usize>(c: &mut Criterion, name: &str, val: u64)
66where
67    StandardUniform: Distribution<F>,
68{
69    let mut rng = SmallRng::seed_from_u64(1);
70    let mut input = Vec::new();
71    for _ in 0..REPS {
72        input.push(rng.random::<F>())
73    }
74    c.bench_function(&format!("{} div_2exp_u64 {}", name, val), |b| {
75        b.iter(|| input.iter_mut().for_each(|i| *i = i.div_2exp_u64(val)))
76    });
77}
78
79/// Benchmark the time taken to sum an array [[F; N]; REPS] by summing each array
80/// [F; N] using .sum() method and accumulating the sums into an accumulator.
81///
82/// Making N larger and REPS smaller (vs the opposite) leans the benchmark more sensitive towards
83/// the latency (resp throughput) of the sum method.
84pub fn benchmark_iter_sum<R: PrimeCharacteristicRing + Copy, const N: usize, const REPS: usize>(
85    c: &mut Criterion,
86    name: &str,
87) where
88    StandardUniform: Distribution<R>,
89{
90    let mut rng = SmallRng::seed_from_u64(1);
91    let mut input = Vec::new();
92    for _ in 0..REPS {
93        input.push(rng.random::<[R; N]>())
94    }
95    c.bench_function(&format!("{} sum/{}, {}", name, REPS, N), |b| {
96        b.iter(|| {
97            let mut acc = R::ZERO;
98            for row in &mut input {
99                acc += row.iter().copied().sum()
100            }
101            acc
102        })
103    });
104}
105
106/// Benchmark the time taken to sum an array [[F; N]; REPS] by summing each array
107/// [F; N] using sum_array method and accumulating the sums into an accumulator.
108///
109/// Making N larger and REPS smaller (vs the opposite) leans the benchmark more sensitive towards
110/// the latency (resp throughput) of the sum method.
111pub fn benchmark_sum_array<R: PrimeCharacteristicRing + Copy, const N: usize, const REPS: usize>(
112    c: &mut Criterion,
113    name: &str,
114) where
115    StandardUniform: Distribution<R>,
116{
117    let mut rng = SmallRng::seed_from_u64(1);
118    let mut input = Vec::new();
119    for _ in 0..REPS {
120        input.push(rng.random::<[R; N]>())
121    }
122    c.bench_function(&format!("{} tree sum/{}, {}", name, REPS, N), |b| {
123        b.iter(|| {
124            let mut acc = R::ZERO;
125            for row in &mut input {
126                acc += R::sum_array::<N>(row)
127            }
128            acc
129        })
130    });
131}
132
133/// Benchmark the time taken to do dot products on a pair of `[R; N]` arrays.
134///
135/// These numbers get more trustworthy as N increases. Small N leads to the
136/// computation being too fast to be measured accurately.
137pub fn benchmark_dot_array<R: PrimeCharacteristicRing + Copy, const N: usize>(
138    c: &mut Criterion,
139    name: &str,
140) where
141    StandardUniform: Distribution<R>,
142{
143    let mut rng = SmallRng::seed_from_u64(1);
144    let lhs = rng.random::<[R; N]>();
145    let rhs = rng.random::<[R; N]>();
146
147    c.bench_function(&format!("{} dot product/{}", name, N), |b| {
148        b.iter(|| black_box(R::dot_product(black_box(&lhs), black_box(&rhs))))
149    });
150}
151
152/// Benchmark the time taken to add two slices together.
153pub fn benchmark_add_slices<F: Field, const LENGTH: usize>(c: &mut Criterion, name: &str)
154where
155    StandardUniform: Distribution<F>,
156{
157    let mut rng = SmallRng::seed_from_u64(1);
158    let mut slice_1 = Vec::new();
159    let mut slice_2 = Vec::new();
160    for _ in 0..LENGTH {
161        slice_1.push(rng.random());
162        slice_2.push(rng.random());
163    }
164    c.bench_function(&format!("{} add slices/{}", name, LENGTH), |b| {
165        let mut in_slice = slice_1.clone();
166        b.iter(|| {
167            F::add_slices(&mut in_slice, &slice_2);
168        })
169    });
170}
171
172pub fn benchmark_add_latency<R: PrimeCharacteristicRing + Copy, const N: usize>(
173    c: &mut Criterion,
174    name: &str,
175) where
176    StandardUniform: Distribution<R>,
177{
178    c.bench_function(&format!("add-latency/{} {}", N, name), |b| {
179        b.iter_batched(
180            || {
181                let mut rng = SmallRng::seed_from_u64(1);
182                let mut vec = Vec::new();
183                for _ in 0..N {
184                    vec.push(rng.random::<R>())
185                }
186                vec
187            },
188            |x| x.iter().fold(R::ZERO, |x, y| x + *y),
189            BatchSize::SmallInput,
190        )
191    });
192}
193
194pub fn benchmark_add_throughput<R: PrimeCharacteristicRing + Copy, const N: usize>(
195    c: &mut Criterion,
196    name: &str,
197) where
198    StandardUniform: Distribution<R>,
199{
200    c.bench_function(&format!("add-throughput/{} {}", N, name), |b| {
201        b.iter_batched(
202            || {
203                let mut rng = SmallRng::seed_from_u64(1);
204                (
205                    rng.random::<R>(),
206                    rng.random::<R>(),
207                    rng.random::<R>(),
208                    rng.random::<R>(),
209                    rng.random::<R>(),
210                    rng.random::<R>(),
211                    rng.random::<R>(),
212                    rng.random::<R>(),
213                    rng.random::<R>(),
214                    rng.random::<R>(),
215                )
216            },
217            |(mut a, mut b, mut c, mut d, mut e, mut f, mut g, mut h, mut i, mut j)| {
218                for _ in 0..N {
219                    (a, b, c, d, e, f, g, h, i, j) = (
220                        a + b,
221                        b + c,
222                        c + d,
223                        d + e,
224                        e + f,
225                        f + g,
226                        g + h,
227                        h + i,
228                        i + j,
229                        j + a,
230                    );
231                }
232                (a, b, c, d, e, f, g, h, i, j)
233            },
234            BatchSize::SmallInput,
235        )
236    });
237}
238
239pub fn benchmark_sub_latency<R: PrimeCharacteristicRing + Copy, const N: usize>(
240    c: &mut Criterion,
241    name: &str,
242) where
243    StandardUniform: Distribution<R>,
244{
245    c.bench_function(&format!("sub-latency/{} {}", N, name), |b| {
246        b.iter_batched(
247            || {
248                let mut rng = SmallRng::seed_from_u64(1);
249                let mut vec = Vec::new();
250                for _ in 0..N {
251                    vec.push(rng.random::<R>())
252                }
253                vec
254            },
255            |x| x.iter().fold(R::ZERO, |x, y| x - *y),
256            BatchSize::SmallInput,
257        )
258    });
259}
260
261pub fn benchmark_sub_throughput<R: PrimeCharacteristicRing + Copy, const N: usize>(
262    c: &mut Criterion,
263    name: &str,
264) where
265    StandardUniform: Distribution<R>,
266{
267    c.bench_function(&format!("sub-throughput/{} {}", N, name), |b| {
268        b.iter_batched(
269            || {
270                let mut rng = SmallRng::seed_from_u64(1);
271                (
272                    rng.random::<R>(),
273                    rng.random::<R>(),
274                    rng.random::<R>(),
275                    rng.random::<R>(),
276                    rng.random::<R>(),
277                    rng.random::<R>(),
278                    rng.random::<R>(),
279                    rng.random::<R>(),
280                    rng.random::<R>(),
281                    rng.random::<R>(),
282                )
283            },
284            |(mut a, mut b, mut c, mut d, mut e, mut f, mut g, mut h, mut i, mut j)| {
285                for _ in 0..N {
286                    (a, b, c, d, e, f, g, h, i, j) = (
287                        a - b,
288                        b - c,
289                        c - d,
290                        d - e,
291                        e - f,
292                        f - g,
293                        g - h,
294                        h - i,
295                        i - j,
296                        j - a,
297                    );
298                }
299                (a, b, c, d, e, f, g, h, i, j)
300            },
301            BatchSize::SmallInput,
302        )
303    });
304}
305
306pub fn benchmark_mul_latency<R: PrimeCharacteristicRing + Copy, const N: usize>(
307    c: &mut Criterion,
308    name: &str,
309) where
310    StandardUniform: Distribution<R>,
311{
312    c.bench_function(&format!("mul-latency/{} {}", N, name), |b| {
313        b.iter_batched(
314            || {
315                let mut rng = SmallRng::seed_from_u64(1);
316                let mut vec = Vec::new();
317                for _ in 0..N {
318                    vec.push(rng.random::<R>())
319                }
320                vec
321            },
322            |x| x.iter().fold(R::ONE, |x, y| x * *y),
323            BatchSize::SmallInput,
324        )
325    });
326}
327
328pub fn benchmark_mul_throughput<R: PrimeCharacteristicRing + Copy, const N: usize>(
329    c: &mut Criterion,
330    name: &str,
331) where
332    StandardUniform: Distribution<R>,
333{
334    c.bench_function(&format!("mul-throughput/{} {}", N, name), |b| {
335        b.iter_batched(
336            || {
337                let mut rng = SmallRng::seed_from_u64(1);
338                (
339                    rng.random::<R>(),
340                    rng.random::<R>(),
341                    rng.random::<R>(),
342                    rng.random::<R>(),
343                    rng.random::<R>(),
344                    rng.random::<R>(),
345                    rng.random::<R>(),
346                    rng.random::<R>(),
347                    rng.random::<R>(),
348                    rng.random::<R>(),
349                )
350            },
351            |(mut a, mut b, mut c, mut d, mut e, mut f, mut g, mut h, mut i, mut j)| {
352                for _ in 0..N {
353                    (a, b, c, d, e, f, g, h, i, j) = (
354                        a * b,
355                        b * c,
356                        c * d,
357                        d * e,
358                        e * f,
359                        f * g,
360                        g * h,
361                        h * i,
362                        i * j,
363                        j * a,
364                    );
365                }
366                (a, b, c, d, e, f, g, h, i, j)
367            },
368            BatchSize::SmallInput,
369        )
370    });
371}