p3_field_testing/
bench_func.rs

1use alloc::format;
2use alloc::vec::Vec;
3use core::hint::black_box;
4
5use criterion::{BatchSize, Criterion};
6use p3_field::{Algebra, Field, PrimeCharacteristicRing};
7use rand::distr::StandardUniform;
8use rand::prelude::Distribution;
9use rand::rngs::SmallRng;
10use rand::{Rng, SeedableRng};
11
12/// Not useful for benchmarking prime fields as multiplication is too fast but
13/// handy for extension fields.
14pub fn benchmark_mul<F: Field>(c: &mut Criterion, name: &str)
15where
16    StandardUniform: Distribution<F>,
17{
18    let mut rng = SmallRng::seed_from_u64(1);
19    let x = rng.random::<F>();
20    let y = rng.random::<F>();
21    c.bench_function(&format!("{name} mul"), |b| {
22        b.iter(|| black_box(black_box(x) * black_box(y)));
23    });
24}
25
26pub fn benchmark_square<F: Field>(c: &mut Criterion, name: &str)
27where
28    StandardUniform: Distribution<F>,
29{
30    let mut rng = SmallRng::seed_from_u64(1);
31    let x = rng.random::<F>();
32    c.bench_function(&format!("{name} square"), |b| {
33        b.iter(|| black_box(black_box(x).square()));
34    });
35}
36
37pub fn benchmark_inv<F: Field>(c: &mut Criterion, name: &str)
38where
39    StandardUniform: Distribution<F>,
40{
41    let mut rng = SmallRng::seed_from_u64(1);
42    let x = rng.random::<F>();
43    c.bench_function(&format!("{name} inv"), |b| {
44        b.iter(|| black_box(black_box(x)).inverse());
45    });
46}
47
48pub fn benchmark_mul_2exp<R: PrimeCharacteristicRing + Copy, const REPS: usize>(
49    c: &mut Criterion,
50    name: &str,
51    val: u64,
52) where
53    StandardUniform: Distribution<R>,
54{
55    let mut rng = SmallRng::seed_from_u64(1);
56    let mut input = Vec::new();
57    for _ in 0..REPS {
58        input.push(rng.random::<R>());
59    }
60    c.bench_function(&format!("{name} mul_2exp_u64 {val}"), |b| {
61        b.iter(|| input.iter_mut().for_each(|i| *i = i.mul_2exp_u64(val)));
62    });
63}
64
65pub fn benchmark_halve<F: Field, const REPS: usize>(c: &mut Criterion, name: &str)
66where
67    StandardUniform: Distribution<F>,
68{
69    let mut rng = SmallRng::seed_from_u64(1);
70    let mut input = Vec::new();
71    for _ in 0..REPS {
72        input.push(rng.random::<F>());
73    }
74    c.bench_function(&format!("{name} halve. Num Reps: {REPS}"), |b| {
75        b.iter(|| input.iter_mut().for_each(|i| *i = i.halve()));
76    });
77}
78
79pub fn benchmark_div_2exp<F: Field, const REPS: usize>(c: &mut Criterion, name: &str, val: u64)
80where
81    StandardUniform: Distribution<F>,
82{
83    let mut rng = SmallRng::seed_from_u64(1);
84    let mut input = Vec::new();
85    for _ in 0..REPS {
86        input.push(rng.random::<F>());
87    }
88    c.bench_function(&format!("{name} div_2exp_u64 {val}"), |b| {
89        b.iter(|| input.iter_mut().for_each(|i| *i = i.div_2exp_u64(val)));
90    });
91}
92
93/// Benchmark the time taken to sum an array [[F; N]; REPS] by summing each array
94/// [F; N] using .sum() method and accumulating the sums into an accumulator.
95///
96/// Making N larger and REPS smaller (vs the opposite) leans the benchmark more sensitive towards
97/// the latency (resp throughput) of the sum method.
98pub fn benchmark_iter_sum<R: PrimeCharacteristicRing + Copy, const N: usize, const REPS: usize>(
99    c: &mut Criterion,
100    name: &str,
101) where
102    StandardUniform: Distribution<R>,
103{
104    let mut rng = SmallRng::seed_from_u64(1);
105    let mut input = Vec::new();
106    for _ in 0..REPS {
107        input.push(rng.random::<[R; N]>());
108    }
109    c.bench_function(&format!("{name} sum/{REPS}, {N}"), |b| {
110        b.iter(|| {
111            let mut acc = R::ZERO;
112            for row in &mut input {
113                acc += row.iter().copied().sum();
114            }
115            acc
116        });
117    });
118}
119
120/// Benchmark the time taken to sum an array [[F; N]; REPS] by summing each array
121/// [F; N] using sum_array method and accumulating the sums into an accumulator.
122///
123/// Making N larger and REPS smaller (vs the opposite) leans the benchmark more sensitive towards
124/// the latency (resp throughput) of the sum method.
125pub fn benchmark_sum_array<R: PrimeCharacteristicRing + Copy, const N: usize, const REPS: usize>(
126    c: &mut Criterion,
127    name: &str,
128) where
129    StandardUniform: Distribution<R>,
130{
131    let mut rng = SmallRng::seed_from_u64(1);
132    let mut input = Vec::new();
133    for _ in 0..REPS {
134        input.push(rng.random::<[R; N]>());
135    }
136    c.bench_function(&format!("{name} tree sum/{REPS}, {N}"), |b| {
137        b.iter(|| {
138            let mut acc = R::ZERO;
139            for row in &mut input {
140                acc += R::sum_array::<N>(row);
141            }
142            acc
143        });
144    });
145}
146
147/// Benchmark the time taken to do dot products on a pair of `[R; N]` arrays.
148///
149/// These numbers get more trustworthy as N increases. Small N leads to the
150/// computation being too fast to be measured accurately.
151pub fn benchmark_dot_array<R: PrimeCharacteristicRing + Copy, const N: usize>(
152    c: &mut Criterion,
153    name: &str,
154) where
155    StandardUniform: Distribution<R>,
156{
157    let mut rng = SmallRng::seed_from_u64(1);
158    let lhs = rng.random::<[R; N]>();
159    let rhs = rng.random::<[R; N]>();
160
161    c.bench_function(&format!("{name} dot product/{N}"), |b| {
162        b.iter(|| black_box(R::dot_product(black_box(&lhs), black_box(&rhs))));
163    });
164}
165
166/// Benchmark the time taken to add two slices together.
167pub fn benchmark_add_slices<F: Field, const LENGTH: usize>(c: &mut Criterion, name: &str)
168where
169    StandardUniform: Distribution<F>,
170{
171    let mut rng = SmallRng::seed_from_u64(1);
172    let mut slice_1 = Vec::new();
173    let mut slice_2 = Vec::new();
174    for _ in 0..LENGTH {
175        slice_1.push(rng.random());
176        slice_2.push(rng.random());
177    }
178    c.bench_function(&format!("{name} add slices/{LENGTH}"), |b| {
179        let mut in_slice = slice_1.clone();
180        b.iter(|| {
181            F::add_slices(&mut in_slice, &slice_2);
182        });
183    });
184}
185
186pub fn benchmark_add_latency<R: PrimeCharacteristicRing + Copy, const N: usize>(
187    c: &mut Criterion,
188    name: &str,
189) where
190    StandardUniform: Distribution<R>,
191{
192    c.bench_function(&format!("add-latency/{N} {name}"), |b| {
193        b.iter_batched(
194            || {
195                let mut rng = SmallRng::seed_from_u64(1);
196                let mut vec = Vec::new();
197                for _ in 0..N {
198                    vec.push(rng.random::<R>());
199                }
200                vec
201            },
202            |x| x.iter().fold(R::ZERO, |x, y| x + *y),
203            BatchSize::SmallInput,
204        );
205    });
206}
207
208pub fn benchmark_add_throughput<R: PrimeCharacteristicRing + Copy, const N: usize>(
209    c: &mut Criterion,
210    name: &str,
211) where
212    StandardUniform: Distribution<R>,
213{
214    c.bench_function(&format!("add-throughput/{N} {name}"), |b| {
215        b.iter_batched(
216            || {
217                let mut rng = SmallRng::seed_from_u64(1);
218                (
219                    rng.random::<R>(),
220                    rng.random::<R>(),
221                    rng.random::<R>(),
222                    rng.random::<R>(),
223                    rng.random::<R>(),
224                    rng.random::<R>(),
225                    rng.random::<R>(),
226                    rng.random::<R>(),
227                    rng.random::<R>(),
228                    rng.random::<R>(),
229                )
230            },
231            |(mut a, mut b, mut c, mut d, mut e, mut f, mut g, mut h, mut i, mut j)| {
232                for _ in 0..N {
233                    (a, b, c, d, e, f, g, h, i, j) = (
234                        a + b,
235                        b + c,
236                        c + d,
237                        d + e,
238                        e + f,
239                        f + g,
240                        g + h,
241                        h + i,
242                        i + j,
243                        j + a,
244                    );
245                }
246                (a, b, c, d, e, f, g, h, i, j)
247            },
248            BatchSize::SmallInput,
249        );
250    });
251}
252
253pub fn benchmark_sub_latency<R: PrimeCharacteristicRing + Copy, const N: usize>(
254    c: &mut Criterion,
255    name: &str,
256) where
257    StandardUniform: Distribution<R>,
258{
259    c.bench_function(&format!("sub-latency/{N} {name}"), |b| {
260        b.iter_batched(
261            || {
262                let mut rng = SmallRng::seed_from_u64(1);
263                let mut vec = Vec::new();
264                for _ in 0..N {
265                    vec.push(rng.random::<R>());
266                }
267                vec
268            },
269            |x| x.iter().fold(R::ZERO, |x, y| x - *y),
270            BatchSize::SmallInput,
271        );
272    });
273}
274
275pub fn benchmark_sub_throughput<R: PrimeCharacteristicRing + Copy, const N: usize>(
276    c: &mut Criterion,
277    name: &str,
278) where
279    StandardUniform: Distribution<R>,
280{
281    c.bench_function(&format!("sub-throughput/{N} {name}"), |b| {
282        b.iter_batched(
283            || {
284                let mut rng = SmallRng::seed_from_u64(1);
285                (
286                    rng.random::<R>(),
287                    rng.random::<R>(),
288                    rng.random::<R>(),
289                    rng.random::<R>(),
290                    rng.random::<R>(),
291                    rng.random::<R>(),
292                    rng.random::<R>(),
293                    rng.random::<R>(),
294                    rng.random::<R>(),
295                    rng.random::<R>(),
296                )
297            },
298            |(mut a, mut b, mut c, mut d, mut e, mut f, mut g, mut h, mut i, mut j)| {
299                for _ in 0..N {
300                    (a, b, c, d, e, f, g, h, i, j) = (
301                        a - b,
302                        b - c,
303                        c - d,
304                        d - e,
305                        e - f,
306                        f - g,
307                        g - h,
308                        h - i,
309                        i - j,
310                        j - a,
311                    );
312                }
313                (a, b, c, d, e, f, g, h, i, j)
314            },
315            BatchSize::SmallInput,
316        );
317    });
318}
319
320pub fn benchmark_mul_latency<R: PrimeCharacteristicRing + Copy, const N: usize>(
321    c: &mut Criterion,
322    name: &str,
323) where
324    StandardUniform: Distribution<R>,
325{
326    c.bench_function(&format!("mul-latency/{N} {name}"), |b| {
327        b.iter_batched(
328            || {
329                let mut rng = SmallRng::seed_from_u64(1);
330                let mut vec = Vec::new();
331                for _ in 0..N {
332                    vec.push(rng.random::<R>());
333                }
334                vec
335            },
336            |x| x.iter().fold(R::ONE, |x, y| x * *y),
337            BatchSize::SmallInput,
338        );
339    });
340}
341
342pub fn benchmark_mul_throughput<R: PrimeCharacteristicRing + Copy, const N: usize>(
343    c: &mut Criterion,
344    name: &str,
345) where
346    StandardUniform: Distribution<R>,
347{
348    c.bench_function(&format!("mul-throughput/{N} {name}"), |b| {
349        b.iter_batched(
350            || {
351                let mut rng = SmallRng::seed_from_u64(1);
352                (
353                    rng.random::<R>(),
354                    rng.random::<R>(),
355                    rng.random::<R>(),
356                    rng.random::<R>(),
357                    rng.random::<R>(),
358                    rng.random::<R>(),
359                    rng.random::<R>(),
360                    rng.random::<R>(),
361                    rng.random::<R>(),
362                    rng.random::<R>(),
363                )
364            },
365            |(mut a, mut b, mut c, mut d, mut e, mut f, mut g, mut h, mut i, mut j)| {
366                for _ in 0..N {
367                    (a, b, c, d, e, f, g, h, i, j) = (
368                        a * b,
369                        b * c,
370                        c * d,
371                        d * e,
372                        e * f,
373                        f * g,
374                        g * h,
375                        h * i,
376                        i * j,
377                        j * a,
378                    );
379                }
380                (a, b, c, d, e, f, g, h, i, j)
381            },
382            BatchSize::SmallInput,
383        );
384    });
385}
386
387pub fn benchmark_base_mul_latency<F: Field, A: Algebra<F> + Copy, const N: usize>(
388    c: &mut Criterion,
389    name: &str,
390) where
391    StandardUniform: Distribution<F> + Distribution<A>,
392{
393    c.bench_function(&format!("base_mul-latency/{N} {name}"), |b| {
394        b.iter_batched(
395            || {
396                let mut rng = SmallRng::seed_from_u64(1);
397                let mut vec = Vec::new();
398                for _ in 0..N {
399                    vec.push(rng.random::<F>());
400                }
401                let init_val = rng.random::<A>();
402                (vec, init_val)
403            },
404            |(x, init_val)| x.iter().fold(init_val, |x, y| x * *y),
405            BatchSize::SmallInput,
406        );
407    });
408}
409
410pub fn benchmark_base_mul_throughput<F: Field, A: Algebra<F> + Copy, const N: usize>(
411    c: &mut Criterion,
412    name: &str,
413) where
414    StandardUniform: Distribution<F> + Distribution<A>,
415{
416    c.bench_function(&format!("base_mul-throughput/{N} {name}"), |b| {
417        b.iter_batched(
418            || {
419                let mut rng = SmallRng::seed_from_u64(1);
420                let a_tuple = (
421                    rng.random::<A>(),
422                    rng.random::<A>(),
423                    rng.random::<A>(),
424                    rng.random::<A>(),
425                    rng.random::<A>(),
426                    rng.random::<A>(),
427                    rng.random::<A>(),
428                    rng.random::<A>(),
429                    rng.random::<A>(),
430                    rng.random::<A>(),
431                );
432                let f_tuple = (
433                    rng.random::<F>(),
434                    rng.random::<F>(),
435                    rng.random::<F>(),
436                    rng.random::<F>(),
437                    rng.random::<F>(),
438                    rng.random::<F>(),
439                    rng.random::<F>(),
440                    rng.random::<F>(),
441                    rng.random::<F>(),
442                    rng.random::<F>(),
443                );
444                (a_tuple, f_tuple)
445            },
446            |(
447                (mut a, mut b, mut c, mut d, mut e, mut f, mut g, mut h, mut i, mut j),
448                (a_f, b_f, c_f, d_f, e_f, f_f, g_f, h_f, i_f, j_f),
449            )| {
450                for _ in 0..N {
451                    (a, b, c, d, e, f, g, h, i, j) = (
452                        a * a_f,
453                        b * b_f,
454                        c * c_f,
455                        d * d_f,
456                        e * e_f,
457                        f * f_f,
458                        g * g_f,
459                        h * h_f,
460                        i * i_f,
461                        j * j_f,
462                    );
463                }
464                (a, b, c, d, e, f, g, h, i, j)
465            },
466            BatchSize::SmallInput,
467        );
468    });
469}
470
471/// Benchmarks the `exp_const_u64` implementation for a given `POWER`.
472///
473/// This function measures the throughput of the exponentiation by applying the operation
474/// to a vector of `REPS` random elements.
475pub fn benchmark_exp_const<R: PrimeCharacteristicRing + Copy, const POWER: u64, const REPS: usize>(
476    c: &mut Criterion,
477    name: &str,
478) where
479    StandardUniform: Distribution<R>,
480{
481    let mut rng = SmallRng::seed_from_u64(1);
482    let input: Vec<R> = (0..REPS).map(|_| rng.random()).collect();
483
484    c.bench_function(&format!("{name} exp_const<{POWER}>/{REPS}"), |b| {
485        b.iter_batched(
486            || input.clone(),
487            |mut data| {
488                for x in data.iter_mut() {
489                    *x = x.exp_const_u64::<POWER>();
490                }
491                black_box(data);
492            },
493            BatchSize::SmallInput,
494        );
495    });
496}