1use crate::{
29 cfg_chunks_mut,
30 cfg_into_iter,
31 cfg_iter,
32 cfg_iter_mut,
33 fft::{DomainCoeff, SparsePolynomial},
34};
35use snarkvm_fields::{FftField, FftParameters, Field, batch_inversion};
36#[cfg(not(feature = "serial"))]
37use snarkvm_utilities::max_available_threads;
38use snarkvm_utilities::{execute_with_max_available_threads, serialize::*};
39
40use rand::Rng;
41use std::{borrow::Cow, fmt};
42
43use anyhow::{Result, ensure};
44
45#[cfg(not(feature = "serial"))]
46use rayon::prelude::*;
47
48#[cfg(feature = "serial")]
49use itertools::Itertools;
50
51pub fn log2(x: usize) -> u32 {
65 if x == 0 {
66 0
67 } else if x.is_power_of_two() {
68 1usize.leading_zeros() - x.leading_zeros()
69 } else {
70 0usize.leading_zeros() - x.leading_zeros()
71 }
72}
73
74#[allow(unused)]
76#[cfg(not(feature = "serial"))]
77const MIN_PARALLEL_CHUNK_SIZE: usize = 1 << 7;
78
79#[derive(Copy, Clone, Hash, Eq, PartialEq, CanonicalSerialize, CanonicalDeserialize)]
83pub struct EvaluationDomain<F: FftField> {
84 pub size: u64,
86 pub log_size_of_group: u32,
88 pub size_as_field_element: F,
90 pub size_inv: F,
92 pub group_gen: F,
94 pub group_gen_inv: F,
96 pub generator_inv: F,
98}
99
100impl<F: FftField> fmt::Debug for EvaluationDomain<F> {
101 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
102 write!(f, "Multiplicative subgroup of size {}", self.size)
103 }
104}
105
106impl<F: FftField> EvaluationDomain<F> {
107 pub fn sample_element_outside_domain<R: Rng>(&self, rng: &mut R) -> F {
109 let mut t = F::rand(rng);
110 while self.evaluate_vanishing_polynomial(t).is_zero() {
111 t = F::rand(rng);
112 }
113 t
114 }
115
116 pub fn new(num_coeffs: usize) -> Option<Self> {
119 let size = num_coeffs.checked_next_power_of_two()? as u64;
121 let log_size_of_group = size.trailing_zeros();
122
123 if log_size_of_group > F::FftParameters::TWO_ADICITY {
125 return None;
126 }
127
128 let group_gen = F::get_root_of_unity(size as usize)?;
131
132 debug_assert_eq!(group_gen.pow([size]), F::one());
134
135 let size_as_field_element = F::from(size);
136 let size_inv = size_as_field_element.inverse()?;
137
138 Some(EvaluationDomain {
139 size,
140 log_size_of_group,
141 size_as_field_element,
142 size_inv,
143 group_gen,
144 group_gen_inv: group_gen.inverse()?,
145 generator_inv: F::multiplicative_generator().inverse()?,
146 })
147 }
148
149 pub fn compute_size_of_domain(num_coeffs: usize) -> Option<usize> {
152 let size = num_coeffs.checked_next_power_of_two()?;
153 if size.trailing_zeros() <= F::FftParameters::TWO_ADICITY { Some(size) } else { None }
154 }
155
156 pub fn size(&self) -> usize {
158 self.size as usize
159 }
160
161 pub fn fft<T: DomainCoeff<F>>(&self, coeffs: &[T]) -> Vec<T> {
163 let mut coeffs = coeffs.to_vec();
164 self.fft_in_place(&mut coeffs);
165 coeffs
166 }
167
168 pub fn fft_in_place<T: DomainCoeff<F>>(&self, coeffs: &mut Vec<T>) {
170 execute_with_max_available_threads(|| {
171 coeffs.resize(self.size(), T::zero());
172 self.in_order_fft_in_place(&mut *coeffs);
173 });
174 }
175
176 pub fn ifft<T: DomainCoeff<F>>(&self, evals: &[T]) -> Vec<T> {
178 let mut evals = evals.to_vec();
179 self.ifft_in_place(&mut evals);
180 evals
181 }
182
183 #[inline]
185 pub fn ifft_in_place<T: DomainCoeff<F>>(&self, evals: &mut Vec<T>) {
186 execute_with_max_available_threads(|| {
187 evals.resize(self.size(), T::zero());
188 self.in_order_ifft_in_place(&mut *evals);
189 });
190 }
191
192 pub fn coset_fft<T: DomainCoeff<F>>(&self, coeffs: &[T]) -> Vec<T> {
194 let mut coeffs = coeffs.to_vec();
195 self.coset_fft_in_place(&mut coeffs);
196 coeffs
197 }
198
199 pub fn coset_fft_in_place<T: DomainCoeff<F>>(&self, coeffs: &mut Vec<T>) {
202 execute_with_max_available_threads(|| {
203 Self::distribute_powers(coeffs, F::multiplicative_generator());
204 self.fft_in_place(coeffs);
205 });
206 }
207
208 pub fn coset_ifft<T: DomainCoeff<F>>(&self, evals: &[T]) -> Vec<T> {
210 let mut evals = evals.to_vec();
211 self.coset_ifft_in_place(&mut evals);
212 evals
213 }
214
215 pub fn coset_ifft_in_place<T: DomainCoeff<F>>(&self, evals: &mut Vec<T>) {
218 execute_with_max_available_threads(|| {
219 evals.resize(self.size(), T::zero());
220 self.in_order_coset_ifft_in_place(&mut *evals);
221 });
222 }
223
224 fn distribute_powers<T: DomainCoeff<F>>(coeffs: &mut [T], g: F) {
226 Self::distribute_powers_and_mul_by_const(coeffs, g, F::one());
227 }
228
229 #[cfg(feature = "serial")]
231 fn distribute_powers_and_mul_by_const<T: DomainCoeff<F>>(coeffs: &mut [T], g: F, c: F) {
232 let mut pow = c;
234 coeffs.iter_mut().for_each(|coeff| {
235 *coeff *= pow;
236 pow *= &g
237 })
238 }
239
240 #[cfg(not(feature = "serial"))]
242 fn distribute_powers_and_mul_by_const<T: DomainCoeff<F>>(coeffs: &mut [T], g: F, c: F) {
243 let min_parallel_chunk_size = 1024;
244 let num_cpus_available = max_available_threads();
245 let num_elem_per_thread = core::cmp::max(coeffs.len() / num_cpus_available, min_parallel_chunk_size);
246
247 cfg_chunks_mut!(coeffs, num_elem_per_thread).enumerate().for_each(|(i, chunk)| {
248 let offset = c * g.pow([(i * num_elem_per_thread) as u64]);
249 let mut pow = offset;
250 chunk.iter_mut().for_each(|coeff| {
251 *coeff *= pow;
252 pow *= &g
253 })
254 });
255 }
256
257 pub fn evaluate_all_lagrange_coefficients(&self, tau: F) -> Vec<F> {
260 let size = self.size as usize;
262 let t_size = tau.pow([self.size]);
263 let one = F::one();
264 if t_size.is_one() {
265 let mut u = vec![F::zero(); size];
266 let mut omega_i = one;
267 for x in u.iter_mut().take(size) {
268 if omega_i == tau {
269 *x = one;
270 break;
271 }
272 omega_i *= &self.group_gen;
273 }
274 u
275 } else {
276 let mut l = (t_size - one) * self.size_inv;
277 let mut r = one;
278 let mut u = vec![F::zero(); size];
279 let mut ls = vec![F::zero(); size];
280 for i in 0..size {
281 u[i] = tau - r;
282 ls[i] = l;
283 l *= &self.group_gen;
284 r *= &self.group_gen;
285 }
286
287 batch_inversion(u.as_mut_slice());
288 cfg_iter_mut!(u).zip_eq(ls).for_each(|(tau_minus_r, l)| {
289 *tau_minus_r = l * *tau_minus_r;
290 });
291 u
292 }
293 }
294
295 pub fn vanishing_polynomial(&self) -> SparsePolynomial<F> {
297 let coeffs = [(0, -F::one()), (self.size(), F::one())];
298 SparsePolynomial::from_coefficients(coeffs)
299 }
300
301 pub fn evaluate_vanishing_polynomial(&self, tau: F) -> F {
305 tau.pow([self.size]) - F::one()
306 }
307
308 pub fn elements(&self) -> Elements<F> {
310 Elements { cur_elem: F::one(), cur_pow: 0, domain: *self }
311 }
312
313 pub fn divide_by_vanishing_poly_on_coset_in_place(&self, evals: &mut [F]) {
317 let i = self.evaluate_vanishing_polynomial(F::multiplicative_generator()).inverse().unwrap();
318
319 cfg_iter_mut!(evals).for_each(|eval| *eval *= &i);
320 }
321
322 pub fn reindex_by_subdomain(&self, other: &Self, index: usize) -> Result<usize> {
326 ensure!(self.size() > other.size(), "other.size() must be smaller than self.size()");
327
328 let period = self.size() / other.size();
333 if index < other.size() {
334 Ok(index * period)
335 } else {
336 let i = index - other.size();
345 let x = period - 1;
346 Ok(i + (i / x) + 1)
347 }
348 }
349
350 pub fn mul_polynomials_in_evaluation_domain(&self, self_evals: Vec<F>, other_evals: &[F]) -> Result<Vec<F>> {
354 let mut result = self_evals;
355
356 ensure!(result.len() == other_evals.len());
357 cfg_iter_mut!(result).zip_eq(other_evals).for_each(|(a, b)| *a *= b);
358
359 Ok(result)
360 }
361}
362
363impl<F: FftField> EvaluationDomain<F> {
364 pub fn precompute_fft(&self) -> FFTPrecomputation<F> {
365 execute_with_max_available_threads(|| FFTPrecomputation {
366 roots: self.roots_of_unity(self.group_gen),
367 domain: *self,
368 })
369 }
370
371 pub fn precompute_ifft(&self) -> IFFTPrecomputation<F> {
372 execute_with_max_available_threads(|| IFFTPrecomputation {
373 inverse_roots: self.roots_of_unity(self.group_gen_inv),
374 domain: *self,
375 })
376 }
377
378 pub(crate) fn in_order_fft_in_place<T: DomainCoeff<F>>(&self, x_s: &mut [T]) {
379 #[cfg(all(feature = "cuda", target_arch = "x86_64"))]
380 if self.size >= 32 && std::mem::size_of::<T>() == 32 {
382 let result = snarkvm_algorithms_cuda::NTT(
383 self.size as usize,
384 x_s,
385 snarkvm_algorithms_cuda::NTTInputOutputOrder::NN,
386 snarkvm_algorithms_cuda::NTTDirection::Forward,
387 snarkvm_algorithms_cuda::NTTType::Standard,
388 );
389 if result.is_ok() {
390 return;
391 }
392 }
393
394 let pc = self.precompute_fft();
395 self.fft_helper_in_place_with_pc(x_s, FFTOrder::II, &pc)
396 }
397
398 pub fn in_order_fft_with_pc<T: DomainCoeff<F>>(&self, x_s: &[T], pc: &FFTPrecomputation<F>) -> Vec<T> {
399 let mut x_s = x_s.to_vec();
400 if self.size() != x_s.len() {
401 x_s.extend(core::iter::repeat(T::zero()).take(self.size() - x_s.len()));
402 }
403 self.fft_helper_in_place_with_pc(&mut x_s, FFTOrder::II, pc);
404 x_s
405 }
406
407 pub(crate) fn in_order_ifft_in_place<T: DomainCoeff<F>>(&self, x_s: &mut [T]) {
408 #[cfg(all(feature = "cuda", target_arch = "x86_64"))]
409 if self.size >= 32 && std::mem::size_of::<T>() == 32 {
411 let result = snarkvm_algorithms_cuda::NTT(
412 self.size as usize,
413 x_s,
414 snarkvm_algorithms_cuda::NTTInputOutputOrder::NN,
415 snarkvm_algorithms_cuda::NTTDirection::Inverse,
416 snarkvm_algorithms_cuda::NTTType::Standard,
417 );
418 if result.is_ok() {
419 return;
420 }
421 }
422
423 let pc = self.precompute_ifft();
424 self.ifft_helper_in_place_with_pc(x_s, FFTOrder::II, &pc);
425 cfg_iter_mut!(x_s).for_each(|val| *val *= self.size_inv);
426 }
427
428 pub(crate) fn in_order_coset_ifft_in_place<T: DomainCoeff<F>>(&self, x_s: &mut [T]) {
429 #[cfg(all(feature = "cuda", target_arch = "x86_64"))]
430 if self.size >= 32 && std::mem::size_of::<T>() == 32 {
432 let result = snarkvm_algorithms_cuda::NTT(
433 self.size as usize,
434 x_s,
435 snarkvm_algorithms_cuda::NTTInputOutputOrder::NN,
436 snarkvm_algorithms_cuda::NTTDirection::Inverse,
437 snarkvm_algorithms_cuda::NTTType::Coset,
438 );
439 if result.is_ok() {
440 return;
441 }
442 }
443
444 let pc = self.precompute_ifft();
445 self.ifft_helper_in_place_with_pc(x_s, FFTOrder::II, &pc);
446 let coset_shift = self.generator_inv;
447 Self::distribute_powers_and_mul_by_const(x_s, coset_shift, self.size_inv);
448 }
449
450 #[allow(unused)]
451 pub(crate) fn in_order_fft_in_place_with_pc<T: DomainCoeff<F>>(
452 &self,
453 x_s: &mut [T],
454 pre_comp: &FFTPrecomputation<F>,
455 ) {
456 #[cfg(all(feature = "cuda", target_arch = "x86_64"))]
457 if self.size >= 32 && std::mem::size_of::<T>() == 32 {
459 let result = snarkvm_algorithms_cuda::NTT(
460 self.size as usize,
461 x_s,
462 snarkvm_algorithms_cuda::NTTInputOutputOrder::NN,
463 snarkvm_algorithms_cuda::NTTDirection::Forward,
464 snarkvm_algorithms_cuda::NTTType::Standard,
465 );
466 if result.is_ok() {
467 return;
468 }
469 }
470
471 self.fft_helper_in_place_with_pc(x_s, FFTOrder::II, pre_comp)
472 }
473
474 pub(crate) fn out_order_fft_in_place_with_pc<T: DomainCoeff<F>>(
475 &self,
476 x_s: &mut [T],
477 pre_comp: &FFTPrecomputation<F>,
478 ) {
479 self.fft_helper_in_place_with_pc(x_s, FFTOrder::IO, pre_comp)
480 }
481
482 pub(crate) fn in_order_ifft_in_place_with_pc<T: DomainCoeff<F>>(
483 &self,
484 x_s: &mut [T],
485 pre_comp: &IFFTPrecomputation<F>,
486 ) {
487 #[cfg(all(feature = "cuda", target_arch = "x86_64"))]
488 if self.size >= 32 && std::mem::size_of::<T>() == 32 {
490 let result = snarkvm_algorithms_cuda::NTT(
491 self.size as usize,
492 x_s,
493 snarkvm_algorithms_cuda::NTTInputOutputOrder::NN,
494 snarkvm_algorithms_cuda::NTTDirection::Inverse,
495 snarkvm_algorithms_cuda::NTTType::Standard,
496 );
497 if result.is_ok() {
498 return;
499 }
500 }
501
502 self.ifft_helper_in_place_with_pc(x_s, FFTOrder::II, pre_comp);
503 cfg_iter_mut!(x_s).for_each(|val| *val *= self.size_inv);
504 }
505
506 pub(crate) fn out_order_ifft_in_place_with_pc<T: DomainCoeff<F>>(
507 &self,
508 x_s: &mut [T],
509 pre_comp: &IFFTPrecomputation<F>,
510 ) {
511 self.ifft_helper_in_place_with_pc(x_s, FFTOrder::OI, pre_comp);
512 cfg_iter_mut!(x_s).for_each(|val| *val *= self.size_inv);
513 }
514
515 #[allow(unused)]
516 pub(crate) fn in_order_coset_ifft_in_place_with_pc<T: DomainCoeff<F>>(
517 &self,
518 x_s: &mut [T],
519 pre_comp: &IFFTPrecomputation<F>,
520 ) {
521 #[cfg(all(feature = "cuda", target_arch = "x86_64"))]
522 if self.size >= 32 && std::mem::size_of::<T>() == 32 {
524 let result = snarkvm_algorithms_cuda::NTT(
525 self.size as usize,
526 x_s,
527 snarkvm_algorithms_cuda::NTTInputOutputOrder::NN,
528 snarkvm_algorithms_cuda::NTTDirection::Inverse,
529 snarkvm_algorithms_cuda::NTTType::Coset,
530 );
531 if result.is_ok() {
532 return;
533 }
534 }
535
536 self.ifft_helper_in_place_with_pc(x_s, FFTOrder::II, pre_comp);
537 let coset_shift = self.generator_inv;
538 Self::distribute_powers_and_mul_by_const(x_s, coset_shift, self.size_inv);
539 }
540
541 fn fft_helper_in_place_with_pc<T: DomainCoeff<F>>(
542 &self,
543 x_s: &mut [T],
544 ord: FFTOrder,
545 pre_comp: &FFTPrecomputation<F>,
546 ) {
547 use FFTOrder::*;
548 let pc = pre_comp.precomputation_for_subdomain(self).unwrap();
549
550 let log_len = log2(x_s.len());
551
552 if ord == OI {
553 self.oi_helper_with_roots(x_s, &pc.roots);
554 } else {
555 self.io_helper_with_roots(x_s, &pc.roots);
556 }
557
558 if ord == II {
559 derange_helper(x_s, log_len);
560 }
561 }
562
563 fn ifft_helper_in_place_with_pc<T: DomainCoeff<F>>(
567 &self,
568 x_s: &mut [T],
569 ord: FFTOrder,
570 pre_comp: &IFFTPrecomputation<F>,
571 ) {
572 use FFTOrder::*;
573 let pc = pre_comp.precomputation_for_subdomain(self).unwrap();
574
575 let log_len = log2(x_s.len());
576
577 if ord == II {
578 derange_helper(x_s, log_len);
579 }
580
581 if ord == IO {
582 self.io_helper_with_roots(x_s, &pc.inverse_roots);
583 } else {
584 self.oi_helper_with_roots(x_s, &pc.inverse_roots);
585 }
586 }
587
588 #[cfg(feature = "serial")]
592 pub fn roots_of_unity(&self, root: F) -> Vec<F> {
593 compute_powers_serial((self.size as usize) / 2, root)
594 }
595
596 #[cfg(not(feature = "serial"))]
598 pub fn roots_of_unity(&self, root: F) -> Vec<F> {
599 let log_size = log2(self.size as usize);
601 if log_size <= LOG_ROOTS_OF_UNITY_PARALLEL_SIZE {
603 compute_powers_serial((self.size as usize) / 2, root)
604 } else {
605 let mut temp = root;
606 let log_powers: Vec<F> = (0..(log_size - 1))
608 .map(|_| {
609 let old_value = temp;
610 temp.square_in_place();
611 old_value
612 })
613 .collect();
614
615 let mut powers = vec![F::zero(); 1 << (log_size - 1)];
617 Self::roots_of_unity_recursive(&mut powers, &log_powers);
618 powers
619 }
620 }
621
622 #[cfg(not(feature = "serial"))]
623 fn roots_of_unity_recursive(out: &mut [F], log_powers: &[F]) {
624 assert_eq!(out.len(), 1 << log_powers.len());
625 if log_powers.len() <= LOG_ROOTS_OF_UNITY_PARALLEL_SIZE as usize {
628 out[0] = F::one();
629 for idx in 1..out.len() {
630 out[idx] = out[idx - 1] * log_powers[0];
631 }
632 return;
633 }
634
635 let (lr_lo, lr_hi) = log_powers.split_at((1 + log_powers.len()) / 2);
638 let mut scr_lo = vec![F::default(); 1 << lr_lo.len()];
639 let mut scr_hi = vec![F::default(); 1 << lr_hi.len()];
640 rayon::join(
642 || Self::roots_of_unity_recursive(&mut scr_lo, lr_lo),
643 || Self::roots_of_unity_recursive(&mut scr_hi, lr_hi),
644 );
645 out.par_chunks_mut(scr_lo.len()).zip(&scr_hi).for_each(|(out_chunk, scr_hi)| {
648 for (out_elem, scr_lo) in out_chunk.iter_mut().zip(&scr_lo) {
649 *out_elem = *scr_hi * scr_lo;
650 }
651 });
652 }
653
654 #[inline(always)]
655 fn butterfly_fn_io<T: DomainCoeff<F>>(((lo, hi), root): ((&mut T, &mut T), &F)) {
656 let neg = *lo - *hi;
657 *lo += *hi;
658 *hi = neg;
659 *hi *= *root;
660 }
661
662 #[inline(always)]
663 fn butterfly_fn_oi<T: DomainCoeff<F>>(((lo, hi), root): ((&mut T, &mut T), &F)) {
664 *hi *= *root;
665 let neg = *lo - *hi;
666 *lo += *hi;
667 *hi = neg;
668 }
669
670 #[allow(clippy::too_many_arguments)]
671 fn apply_butterfly<T: DomainCoeff<F>, G: Fn(((&mut T, &mut T), &F)) + Copy + Sync + Send>(
672 g: G,
673 xi: &mut [T],
674 roots: &[F],
675 step: usize,
676 chunk_size: usize,
677 num_chunks: usize,
678 max_threads: usize,
679 gap: usize,
680 ) {
681 cfg_chunks_mut!(xi, chunk_size).for_each(|cxi| {
682 let (lo, hi) = cxi.split_at_mut(gap);
683 if gap > MIN_GAP_SIZE_FOR_PARALLELISATION && num_chunks < max_threads {
687 cfg_iter_mut!(lo).zip(hi).zip(cfg_iter!(roots).step_by(step)).for_each(g);
688 } else {
689 lo.iter_mut().zip(hi).zip(roots.iter().step_by(step)).for_each(g);
690 }
691 });
692 }
693
694 #[allow(clippy::unnecessary_to_owned)]
695 fn io_helper_with_roots<T: DomainCoeff<F>>(&self, xi: &mut [T], roots: &[F]) {
696 let mut roots = std::borrow::Cow::Borrowed(roots);
697
698 let mut step = 1;
699 let mut first = true;
700
701 #[cfg(not(feature = "serial"))]
702 let max_threads = snarkvm_utilities::parallel::max_available_threads();
703 #[cfg(feature = "serial")]
704 let max_threads = 1;
705
706 let mut gap = xi.len() / 2;
707 while gap > 0 {
708 let chunk_size = 2 * gap;
710 let num_chunks = xi.len() / chunk_size;
711
712 if num_chunks >= MIN_NUM_CHUNKS_FOR_COMPACTION {
716 if !first {
717 roots = Cow::Owned(cfg_into_iter!(roots.into_owned()).step_by(step * 2).collect());
718 }
719 step = 1;
720 roots.to_mut().shrink_to_fit();
721 } else {
722 step = num_chunks;
723 }
724 first = false;
725
726 Self::apply_butterfly(
727 Self::butterfly_fn_io,
728 xi,
729 &roots[..],
730 step,
731 chunk_size,
732 num_chunks,
733 max_threads,
734 gap,
735 );
736
737 gap /= 2;
738 }
739 }
740
741 fn oi_helper_with_roots<T: DomainCoeff<F>>(&self, xi: &mut [T], roots_cache: &[F]) {
742 let compaction_max_size =
747 core::cmp::min(roots_cache.len() / 2, roots_cache.len() / MIN_NUM_CHUNKS_FOR_COMPACTION);
748 let mut compacted_roots = vec![F::default(); compaction_max_size];
749
750 #[cfg(not(feature = "serial"))]
751 let max_threads = snarkvm_utilities::parallel::max_available_threads();
752 #[cfg(feature = "serial")]
753 let max_threads = 1;
754
755 let mut gap = 1;
756 while gap < xi.len() {
757 let chunk_size = 2 * gap;
759 let num_chunks = xi.len() / chunk_size;
760
761 let (roots, step) = if num_chunks >= MIN_NUM_CHUNKS_FOR_COMPACTION && gap < xi.len() / 2 {
765 cfg_iter_mut!(compacted_roots[..gap])
766 .zip(cfg_iter!(roots_cache[..(gap * num_chunks)]).step_by(num_chunks))
767 .for_each(|(a, b)| *a = *b);
768 (&compacted_roots[..gap], 1)
769 } else {
770 (roots_cache, num_chunks)
771 };
772
773 Self::apply_butterfly(Self::butterfly_fn_oi, xi, roots, step, chunk_size, num_chunks, max_threads, gap);
774
775 gap *= 2;
776 }
777 }
778}
779
780const MIN_NUM_CHUNKS_FOR_COMPACTION: usize = 1 << 7;
783
784const MIN_GAP_SIZE_FOR_PARALLELISATION: usize = 1 << 10;
787
788#[cfg(not(feature = "serial"))]
790const LOG_ROOTS_OF_UNITY_PARALLEL_SIZE: u32 = 7;
791
792#[inline]
793pub(super) fn bitrev(a: u64, log_len: u32) -> u64 {
794 a.reverse_bits() >> (64 - log_len)
795}
796
797pub(crate) fn derange<T>(xi: &mut [T]) {
798 derange_helper(xi, log2(xi.len()))
799}
800
801fn derange_helper<T>(xi: &mut [T], log_len: u32) {
802 for idx in 1..(xi.len() as u64 - 1) {
803 let ridx = bitrev(idx, log_len);
804 if idx < ridx {
805 xi.swap(idx as usize, ridx as usize);
806 }
807 }
808}
809
810#[derive(PartialEq, Eq, Debug)]
811enum FFTOrder {
812 II,
814 IO,
817 OI,
820}
821
822pub(crate) fn compute_powers_serial<F: Field>(size: usize, root: F) -> Vec<F> {
823 compute_powers_and_mul_by_const_serial(size, root, F::one())
824}
825
826pub(crate) fn compute_powers_and_mul_by_const_serial<F: Field>(size: usize, root: F, c: F) -> Vec<F> {
827 let mut value = c;
828 (0..size)
829 .map(|_| {
830 let old_value = value;
831 value *= root;
832 old_value
833 })
834 .collect()
835}
836
837#[allow(unused)]
838#[cfg(not(feature = "serial"))]
839pub(crate) fn compute_powers<F: Field>(size: usize, g: F) -> Vec<F> {
840 if size < MIN_PARALLEL_CHUNK_SIZE {
841 return compute_powers_serial(size, g);
842 }
843 let num_cpus_available = max_available_threads();
845 let num_elem_per_thread = core::cmp::max(size / num_cpus_available, MIN_PARALLEL_CHUNK_SIZE);
846 let num_cpus_used = size / num_elem_per_thread;
847
848 let res: Vec<F> = (0..num_cpus_used)
850 .into_par_iter()
851 .flat_map(|i| {
852 let offset = g.pow([(i * num_elem_per_thread) as u64]);
853 let num_elements_to_compute = core::cmp::min(size - i * num_elem_per_thread, num_elem_per_thread);
857 compute_powers_and_mul_by_const_serial(num_elements_to_compute, g, offset)
858 })
859 .collect();
860 res
861}
862
863#[derive(Clone)]
865pub struct Elements<F: FftField> {
866 cur_elem: F,
867 cur_pow: u64,
868 domain: EvaluationDomain<F>,
869}
870
871impl<F: FftField> Iterator for Elements<F> {
872 type Item = F;
873
874 fn next(&mut self) -> Option<F> {
875 if self.cur_pow == self.domain.size {
876 None
877 } else {
878 let cur_elem = self.cur_elem;
879 self.cur_elem *= &self.domain.group_gen;
880 self.cur_pow += 1;
881 Some(cur_elem)
882 }
883 }
884}
885
886#[derive(Clone, Eq, PartialEq, Debug, CanonicalDeserialize, CanonicalSerialize)]
888pub struct FFTPrecomputation<F: FftField> {
889 roots: Vec<F>,
890 domain: EvaluationDomain<F>,
891}
892
893impl<F: FftField> FFTPrecomputation<F> {
894 pub fn to_ifft_precomputation(&self) -> IFFTPrecomputation<F> {
895 let mut inverse_roots = self.roots.clone();
896 snarkvm_fields::batch_inversion(&mut inverse_roots);
897 IFFTPrecomputation { inverse_roots, domain: self.domain }
898 }
899
900 pub fn precomputation_for_subdomain<'a>(&'a self, domain: &EvaluationDomain<F>) -> Option<Cow<'a, Self>> {
901 if domain.size() == 1 {
902 return Some(Cow::Owned(Self { roots: vec![], domain: *domain }));
903 }
904 if &self.domain == domain {
905 Some(Cow::Borrowed(self))
906 } else if domain.size() < self.domain.size() {
907 let size_ratio = self.domain.size() / domain.size();
908 let roots = self.roots.iter().step_by(size_ratio).copied().collect();
909 Some(Cow::Owned(Self { roots, domain: *domain }))
910 } else {
911 None
912 }
913 }
914}
915
916#[derive(Clone, Eq, PartialEq, Debug, CanonicalSerialize, CanonicalDeserialize)]
918pub struct IFFTPrecomputation<F: FftField> {
919 inverse_roots: Vec<F>,
920 domain: EvaluationDomain<F>,
921}
922
923impl<F: FftField> IFFTPrecomputation<F> {
924 pub fn precomputation_for_subdomain<'a>(&'a self, domain: &EvaluationDomain<F>) -> Option<Cow<'a, Self>> {
925 if domain.size() == 1 {
926 return Some(Cow::Owned(Self { inverse_roots: vec![], domain: *domain }));
927 }
928 if &self.domain == domain {
929 Some(Cow::Borrowed(self))
930 } else if domain.size() < self.domain.size() {
931 let size_ratio = self.domain.size() / domain.size();
932 let inverse_roots = self.inverse_roots.iter().step_by(size_ratio).copied().collect();
933 Some(Cow::Owned(Self { inverse_roots, domain: *domain }))
934 } else {
935 None
936 }
937 }
938}
939
940#[cfg(test)]
941mod tests {
942 #[cfg(all(feature = "cuda", target_arch = "x86_64"))]
943 use crate::fft::domain::FFTOrder;
944 use crate::fft::{DensePolynomial, EvaluationDomain};
945 use rand::Rng;
946 use snarkvm_curves::bls12_377::Fr;
947 use snarkvm_fields::{FftField, Field, One, Zero};
948 use snarkvm_utilities::{TestRng, Uniform};
949
950 #[test]
951 fn vanishing_polynomial_evaluation() {
952 let rng = &mut TestRng::default();
953 for coeffs in 0..10 {
954 let domain = EvaluationDomain::<Fr>::new(coeffs).unwrap();
955 let z = domain.vanishing_polynomial();
956 for _ in 0..100 {
957 let point = rng.gen();
958 assert_eq!(z.evaluate(point), domain.evaluate_vanishing_polynomial(point))
959 }
960 }
961 }
962
963 #[test]
964 fn vanishing_polynomial_vanishes_on_domain() {
965 for coeffs in 0..1000 {
966 let domain = EvaluationDomain::<Fr>::new(coeffs).unwrap();
967 let z = domain.vanishing_polynomial();
968 for point in domain.elements() {
969 assert!(z.evaluate(point).is_zero())
970 }
971 }
972 }
973
974 #[test]
975 fn size_of_elements() {
976 for coeffs in 1..10 {
977 let size = 1 << coeffs;
978 let domain = EvaluationDomain::<Fr>::new(size).unwrap();
979 let domain_size = domain.size();
980 assert_eq!(domain_size, domain.elements().count());
981 }
982 }
983
984 #[test]
985 fn elements_contents() {
986 for coeffs in 1..10 {
987 let size = 1 << coeffs;
988 let domain = EvaluationDomain::<Fr>::new(size).unwrap();
989 for (i, element) in domain.elements().enumerate() {
990 assert_eq!(element, domain.group_gen.pow([i as u64]));
991 }
992 }
993 }
994
995 #[test]
998 fn non_systematic_lagrange_coefficients_test() {
999 let mut rng = TestRng::default();
1000 for domain_dimension in 1..10 {
1001 let domain_size = 1 << domain_dimension;
1002 let domain = EvaluationDomain::<Fr>::new(domain_size).unwrap();
1003 let random_point = Fr::rand(&mut rng);
1005 let lagrange_coefficients = domain.evaluate_all_lagrange_coefficients(random_point);
1006
1007 let random_polynomial = DensePolynomial::<Fr>::rand(domain_size - 1, &mut rng);
1010 let polynomial_evaluations = domain.fft(random_polynomial.coeffs());
1011 let actual_evaluations = random_polynomial.evaluate(random_point);
1012
1013 let mut interpolated_evaluation = Fr::zero();
1015 for i in 0..domain_size {
1016 interpolated_evaluation += lagrange_coefficients[i] * polynomial_evaluations[i];
1017 }
1018 assert_eq!(actual_evaluations, interpolated_evaluation);
1019 }
1020 }
1021
1022 #[test]
1024 fn systematic_lagrange_coefficients_test() {
1025 for domain_dimension in 1..5 {
1029 let domain_size = 1 << domain_dimension;
1030 let domain = EvaluationDomain::<Fr>::new(domain_size).unwrap();
1031 let all_domain_elements: Vec<Fr> = domain.elements().collect();
1032 for (i, domain_element) in all_domain_elements.iter().enumerate().take(domain_size) {
1033 let lagrange_coefficients = domain.evaluate_all_lagrange_coefficients(*domain_element);
1034 for (j, lagrange_coefficient) in lagrange_coefficients.iter().enumerate().take(domain_size) {
1035 if i == j {
1037 assert_eq!(*lagrange_coefficient, Fr::one());
1038 } else {
1039 assert_eq!(*lagrange_coefficient, Fr::zero());
1040 }
1041 }
1042 }
1043 }
1044 }
1045
1046 #[test]
1048 fn test_roots_of_unity() {
1049 let max_degree = 10;
1050 for log_domain_size in 0..max_degree {
1051 let domain_size = 1 << log_domain_size;
1052 let domain = EvaluationDomain::<Fr>::new(domain_size).unwrap();
1053 let actual_roots = domain.roots_of_unity(domain.group_gen);
1054 for &value in &actual_roots {
1055 assert!(domain.evaluate_vanishing_polynomial(value).is_zero());
1056 }
1057 let expected_roots_elements = domain.elements();
1058 for (expected, &actual) in expected_roots_elements.zip(&actual_roots) {
1059 assert_eq!(expected, actual);
1060 }
1061 assert_eq!(actual_roots.len(), domain_size / 2);
1062 }
1063 }
1064
1065 #[test]
1067 fn test_fft_correctness() {
1068 let mut rng = TestRng::default();
1073
1074 let log_degree = 5;
1076 let degree = 1 << log_degree;
1077 let random_polynomial = DensePolynomial::<Fr>::rand(degree - 1, &mut rng);
1078
1079 for log_domain_size in log_degree..(log_degree + 2) {
1080 let domain_size = 1 << log_domain_size;
1081 let domain = EvaluationDomain::<Fr>::new(domain_size).unwrap();
1082 let polynomial_evaluations = domain.fft(&random_polynomial.coeffs);
1083 let polynomial_coset_evaluations = domain.coset_fft(&random_polynomial.coeffs);
1084 for (i, x) in domain.elements().enumerate() {
1085 let coset_x = Fr::multiplicative_generator() * x;
1086
1087 assert_eq!(polynomial_evaluations[i], random_polynomial.evaluate(x));
1088 assert_eq!(polynomial_coset_evaluations[i], random_polynomial.evaluate(coset_x));
1089 }
1090
1091 let randon_polynomial_from_subgroup =
1092 DensePolynomial::from_coefficients_vec(domain.ifft(&polynomial_evaluations));
1093 let random_polynomial_from_coset =
1094 DensePolynomial::from_coefficients_vec(domain.coset_ifft(&polynomial_coset_evaluations));
1095
1096 assert_eq!(
1097 random_polynomial, randon_polynomial_from_subgroup,
1098 "degree = {degree}, domain size = {domain_size}"
1099 );
1100 assert_eq!(
1101 random_polynomial, random_polynomial_from_coset,
1102 "degree = {degree}, domain size = {domain_size}"
1103 );
1104 }
1105 }
1106
1107 #[test]
1109 fn test_fft_precomputation() {
1110 for i in 1..10 {
1111 let big_domain = EvaluationDomain::<Fr>::new(i).unwrap();
1112 let pc = big_domain.precompute_fft();
1113 for j in 1..i {
1114 let small_domain = EvaluationDomain::<Fr>::new(j).unwrap();
1115 let small_pc = small_domain.precompute_fft();
1116 assert_eq!(pc.precomputation_for_subdomain(&small_domain).unwrap().as_ref(), &small_pc);
1117 }
1118 }
1119 }
1120
1121 #[test]
1123 fn test_ifft_precomputation() {
1124 for i in 1..10 {
1125 let big_domain = EvaluationDomain::<Fr>::new(i).unwrap();
1126 let pc = big_domain.precompute_ifft();
1127 for j in 1..i {
1128 let small_domain = EvaluationDomain::<Fr>::new(j).unwrap();
1129 let small_pc = small_domain.precompute_ifft();
1130 assert_eq!(pc.precomputation_for_subdomain(&small_domain).unwrap().as_ref(), &small_pc);
1131 }
1132 }
1133 }
1134
1135 #[test]
1138 fn test_ifft_precomputation_from_fft() {
1139 for i in 1..10 {
1140 let domain = EvaluationDomain::<Fr>::new(i).unwrap();
1141 let pc = domain.precompute_ifft();
1142 let fft_pc = domain.precompute_fft();
1143 assert_eq!(pc, fft_pc.to_ifft_precomputation())
1144 }
1145 }
1146
1147 #[cfg(all(feature = "cuda", target_arch = "x86_64"))]
1149 #[test]
1150 fn test_fft_correctness_cuda() {
1151 let mut rng = TestRng::default();
1152 for log_domain in 2..20 {
1153 println!("Testing domain size {log_domain}");
1154 let domain_size = 1 << log_domain;
1155 let random_polynomial = DensePolynomial::<Fr>::rand(domain_size - 1, &mut rng);
1156 let mut polynomial_evaluations = random_polynomial.coeffs.clone();
1157 let mut polynomial_evaluations_cuda = random_polynomial.coeffs.clone();
1158
1159 let domain = EvaluationDomain::<Fr>::new(domain_size).unwrap();
1160 let pc = domain.precompute_fft();
1161 domain.fft_helper_in_place_with_pc(&mut polynomial_evaluations, FFTOrder::II, &pc);
1162
1163 if snarkvm_algorithms_cuda::NTT::<Fr>(
1164 domain_size,
1165 &mut polynomial_evaluations_cuda,
1166 snarkvm_algorithms_cuda::NTTInputOutputOrder::NN,
1167 snarkvm_algorithms_cuda::NTTDirection::Forward,
1168 snarkvm_algorithms_cuda::NTTType::Standard,
1169 )
1170 .is_err()
1171 {
1172 println!("cuda error!");
1173 }
1174
1175 assert_eq!(polynomial_evaluations, polynomial_evaluations_cuda, "domain size = {domain_size}");
1176
1177 if snarkvm_algorithms_cuda::NTT::<Fr>(
1179 domain_size,
1180 &mut polynomial_evaluations_cuda,
1181 snarkvm_algorithms_cuda::NTTInputOutputOrder::NN,
1182 snarkvm_algorithms_cuda::NTTDirection::Inverse,
1183 snarkvm_algorithms_cuda::NTTType::Standard,
1184 )
1185 .is_err()
1186 {
1187 println!("cuda error!");
1188 }
1189 assert_eq!(random_polynomial.coeffs, polynomial_evaluations_cuda, "domain size = {domain_size}");
1190
1191 polynomial_evaluations = random_polynomial.coeffs.clone();
1193 let domain = EvaluationDomain::<Fr>::new(domain_size).unwrap();
1194 let pc = domain.precompute_fft();
1195 EvaluationDomain::<Fr>::distribute_powers(&mut polynomial_evaluations, Fr::multiplicative_generator());
1196 domain.fft_helper_in_place_with_pc(&mut polynomial_evaluations, FFTOrder::II, &pc);
1197
1198 if snarkvm_algorithms_cuda::NTT::<Fr>(
1199 domain_size,
1200 &mut polynomial_evaluations_cuda,
1201 snarkvm_algorithms_cuda::NTTInputOutputOrder::NN,
1202 snarkvm_algorithms_cuda::NTTDirection::Forward,
1203 snarkvm_algorithms_cuda::NTTType::Coset,
1204 )
1205 .is_err()
1206 {
1207 println!("cuda error!");
1208 }
1209
1210 assert_eq!(polynomial_evaluations, polynomial_evaluations_cuda, "domain size = {domain_size}");
1211
1212 if snarkvm_algorithms_cuda::NTT::<Fr>(
1214 domain_size,
1215 &mut polynomial_evaluations_cuda,
1216 snarkvm_algorithms_cuda::NTTInputOutputOrder::NN,
1217 snarkvm_algorithms_cuda::NTTDirection::Inverse,
1218 snarkvm_algorithms_cuda::NTTType::Coset,
1219 )
1220 .is_err()
1221 {
1222 println!("cuda error!");
1223 }
1224 assert_eq!(random_polynomial.coeffs, polynomial_evaluations_cuda, "domain size = {domain_size}");
1225 }
1226 }
1227}