1use crate::{
29 cfg_chunks_mut,
30 cfg_into_iter,
31 cfg_iter,
32 cfg_iter_mut,
33 fft::{DomainCoeff, SparsePolynomial},
34};
35use snarkvm_fields::{FftField, FftParameters, Field, batch_inversion};
36#[cfg(not(feature = "serial"))]
37use snarkvm_utilities::max_available_threads;
38use snarkvm_utilities::{execute_with_max_available_threads, serialize::*};
39
40use rand::Rng;
41use std::{borrow::Cow, fmt};
42
43use anyhow::{Result, ensure};
44
45#[cfg(feature = "serial")]
46use itertools::Itertools;
47#[cfg(not(feature = "serial"))]
48use rayon::prelude::*;
49
50pub fn log2(x: usize) -> u32 {
64 if x == 0 {
65 0
66 } else if x.is_power_of_two() {
67 1usize.leading_zeros() - x.leading_zeros()
68 } else {
69 0usize.leading_zeros() - x.leading_zeros()
70 }
71}
72
73#[allow(unused)]
75#[cfg(not(feature = "serial"))]
76const MIN_PARALLEL_CHUNK_SIZE: usize = 1 << 7;
77
78#[derive(Copy, Clone, Hash, Eq, PartialEq, CanonicalSerialize, CanonicalDeserialize)]
82pub struct EvaluationDomain<F: FftField> {
83 pub size: u64,
85 pub log_size_of_group: u32,
87 pub size_as_field_element: F,
89 pub size_inv: F,
91 pub group_gen: F,
93 pub group_gen_inv: F,
95 pub generator_inv: F,
97}
98
99impl<F: FftField> fmt::Debug for EvaluationDomain<F> {
100 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
101 write!(f, "Multiplicative subgroup of size {}", self.size)
102 }
103}
104
105impl<F: FftField> EvaluationDomain<F> {
106 pub fn sample_element_outside_domain<R: Rng>(&self, rng: &mut R) -> F {
108 let mut t = F::rand(rng);
109 while self.evaluate_vanishing_polynomial(t).is_zero() {
110 t = F::rand(rng);
111 }
112 t
113 }
114
115 pub fn new(num_coeffs: usize) -> Option<Self> {
118 let size = num_coeffs.checked_next_power_of_two()? as u64;
120 let log_size_of_group = size.trailing_zeros();
121
122 if log_size_of_group > F::FftParameters::TWO_ADICITY {
124 return None;
125 }
126
127 let group_gen = F::get_root_of_unity(size as usize)?;
130
131 debug_assert_eq!(group_gen.pow([size]), F::one());
133
134 let size_as_field_element = F::from(size);
135 let size_inv = size_as_field_element.inverse()?;
136
137 Some(EvaluationDomain {
138 size,
139 log_size_of_group,
140 size_as_field_element,
141 size_inv,
142 group_gen,
143 group_gen_inv: group_gen.inverse()?,
144 generator_inv: F::multiplicative_generator().inverse()?,
145 })
146 }
147
148 pub fn compute_size_of_domain(num_coeffs: usize) -> Option<usize> {
151 let size = num_coeffs.checked_next_power_of_two()?;
152 if size.trailing_zeros() <= F::FftParameters::TWO_ADICITY { Some(size) } else { None }
153 }
154
155 pub fn size(&self) -> usize {
157 self.size as usize
158 }
159
160 pub fn fft<T: DomainCoeff<F>>(&self, coeffs: &[T]) -> Vec<T> {
162 let mut coeffs = coeffs.to_vec();
163 self.fft_in_place(&mut coeffs);
164 coeffs
165 }
166
167 pub fn fft_in_place<T: DomainCoeff<F>>(&self, coeffs: &mut Vec<T>) {
169 execute_with_max_available_threads(|| {
170 coeffs.resize(self.size(), T::zero());
171 self.in_order_fft_in_place(&mut *coeffs);
172 });
173 }
174
175 pub fn ifft<T: DomainCoeff<F>>(&self, evals: &[T]) -> Vec<T> {
177 let mut evals = evals.to_vec();
178 self.ifft_in_place(&mut evals);
179 evals
180 }
181
182 #[inline]
184 pub fn ifft_in_place<T: DomainCoeff<F>>(&self, evals: &mut Vec<T>) {
185 execute_with_max_available_threads(|| {
186 evals.resize(self.size(), T::zero());
187 self.in_order_ifft_in_place(&mut *evals);
188 });
189 }
190
191 pub fn coset_fft<T: DomainCoeff<F>>(&self, coeffs: &[T]) -> Vec<T> {
193 let mut coeffs = coeffs.to_vec();
194 self.coset_fft_in_place(&mut coeffs);
195 coeffs
196 }
197
198 pub fn coset_fft_in_place<T: DomainCoeff<F>>(&self, coeffs: &mut Vec<T>) {
201 execute_with_max_available_threads(|| {
202 Self::distribute_powers(coeffs, F::multiplicative_generator());
203 self.fft_in_place(coeffs);
204 });
205 }
206
207 pub fn coset_ifft<T: DomainCoeff<F>>(&self, evals: &[T]) -> Vec<T> {
209 let mut evals = evals.to_vec();
210 self.coset_ifft_in_place(&mut evals);
211 evals
212 }
213
214 pub fn coset_ifft_in_place<T: DomainCoeff<F>>(&self, evals: &mut Vec<T>) {
217 execute_with_max_available_threads(|| {
218 evals.resize(self.size(), T::zero());
219 self.in_order_coset_ifft_in_place(&mut *evals);
220 });
221 }
222
223 fn distribute_powers<T: DomainCoeff<F>>(coeffs: &mut [T], g: F) {
225 Self::distribute_powers_and_mul_by_const(coeffs, g, F::one());
226 }
227
228 #[cfg(feature = "serial")]
230 fn distribute_powers_and_mul_by_const<T: DomainCoeff<F>>(coeffs: &mut [T], g: F, c: F) {
231 let mut pow = c;
233 coeffs.iter_mut().for_each(|coeff| {
234 *coeff *= pow;
235 pow *= &g
236 })
237 }
238
239 #[cfg(not(feature = "serial"))]
241 fn distribute_powers_and_mul_by_const<T: DomainCoeff<F>>(coeffs: &mut [T], g: F, c: F) {
242 let min_parallel_chunk_size = 1024;
243 let num_cpus_available = max_available_threads();
244 let num_elem_per_thread = core::cmp::max(coeffs.len() / num_cpus_available, min_parallel_chunk_size);
245
246 cfg_chunks_mut!(coeffs, num_elem_per_thread).enumerate().for_each(|(i, chunk)| {
247 let offset = c * g.pow([(i * num_elem_per_thread) as u64]);
248 let mut pow = offset;
249 chunk.iter_mut().for_each(|coeff| {
250 *coeff *= pow;
251 pow *= &g
252 })
253 });
254 }
255
256 pub fn evaluate_all_lagrange_coefficients(&self, tau: F) -> Vec<F> {
259 let size = self.size as usize;
261 let t_size = tau.pow([self.size]);
262 let one = F::one();
263 if t_size.is_one() {
264 let mut u = vec![F::zero(); size];
265 let mut omega_i = one;
266 for x in u.iter_mut().take(size) {
267 if omega_i == tau {
268 *x = one;
269 break;
270 }
271 omega_i *= &self.group_gen;
272 }
273 u
274 } else {
275 let mut l = (t_size - one) * self.size_inv;
276 let mut r = one;
277 let mut u = vec![F::zero(); size];
278 let mut ls = vec![F::zero(); size];
279 for i in 0..size {
280 u[i] = tau - r;
281 ls[i] = l;
282 l *= &self.group_gen;
283 r *= &self.group_gen;
284 }
285
286 batch_inversion(u.as_mut_slice());
287 cfg_iter_mut!(u, 1_000).zip_eq(ls).for_each(|(tau_minus_r, l)| {
288 *tau_minus_r = l * *tau_minus_r;
289 });
290 u
291 }
292 }
293
294 pub fn vanishing_polynomial(&self) -> SparsePolynomial<F> {
296 let coeffs = [(0, -F::one()), (self.size(), F::one())];
297 SparsePolynomial::from_coefficients(coeffs)
298 }
299
300 pub fn evaluate_vanishing_polynomial(&self, tau: F) -> F {
304 tau.pow([self.size]) - F::one()
305 }
306
307 pub fn elements(&self) -> Elements<F> {
309 Elements { cur_elem: F::one(), cur_pow: 0, domain: *self }
310 }
311
312 pub fn divide_by_vanishing_poly_on_coset_in_place(&self, evals: &mut [F]) {
316 let i = self.evaluate_vanishing_polynomial(F::multiplicative_generator()).inverse().unwrap();
317
318 cfg_iter_mut!(evals).for_each(|eval| *eval *= &i);
319 }
320
321 pub fn reindex_by_subdomain(&self, other: &Self, index: usize) -> Result<usize> {
325 ensure!(self.size() > other.size(), "other.size() must be smaller than self.size()");
326
327 let period = self.size() / other.size();
332 if index < other.size() {
333 Ok(index * period)
334 } else {
335 let i = index - other.size();
344 let x = period - 1;
345 Ok(i + (i / x) + 1)
346 }
347 }
348
349 pub fn mul_polynomials_in_evaluation_domain(&self, self_evals: Vec<F>, other_evals: &[F]) -> Result<Vec<F>> {
353 let mut result = self_evals;
354
355 ensure!(result.len() == other_evals.len());
356 cfg_iter_mut!(result).zip_eq(other_evals).for_each(|(a, b)| *a *= b);
357
358 Ok(result)
359 }
360}
361
362impl<F: FftField> EvaluationDomain<F> {
363 pub fn precompute_fft(&self) -> FFTPrecomputation<F> {
364 execute_with_max_available_threads(|| FFTPrecomputation {
365 roots: self.roots_of_unity(self.group_gen),
366 domain: *self,
367 })
368 }
369
370 pub fn precompute_ifft(&self) -> IFFTPrecomputation<F> {
371 execute_with_max_available_threads(|| IFFTPrecomputation {
372 inverse_roots: self.roots_of_unity(self.group_gen_inv),
373 domain: *self,
374 })
375 }
376
377 pub(crate) fn in_order_fft_in_place<T: DomainCoeff<F>>(&self, x_s: &mut [T]) {
378 #[cfg(all(feature = "cuda", target_arch = "x86_64"))]
379 if self.size >= 32 && std::mem::size_of::<T>() == 32 {
381 let result = snarkvm_algorithms_cuda::NTT(
382 self.size as usize,
383 x_s,
384 snarkvm_algorithms_cuda::NTTInputOutputOrder::NN,
385 snarkvm_algorithms_cuda::NTTDirection::Forward,
386 snarkvm_algorithms_cuda::NTTType::Standard,
387 );
388 if result.is_ok() {
389 return;
390 }
391 }
392
393 let pc = self.precompute_fft();
394 self.fft_helper_in_place_with_pc(x_s, FFTOrder::II, &pc)
395 }
396
397 pub fn in_order_fft_with_pc<T: DomainCoeff<F>>(&self, x_s: &[T], pc: &FFTPrecomputation<F>) -> Vec<T> {
398 let mut x_s = x_s.to_vec();
399 if self.size() != x_s.len() {
400 x_s.extend(core::iter::repeat_n(T::zero(), self.size() - x_s.len()));
401 }
402 self.fft_helper_in_place_with_pc(&mut x_s, FFTOrder::II, pc);
403 x_s
404 }
405
406 pub(crate) fn in_order_ifft_in_place<T: DomainCoeff<F>>(&self, x_s: &mut [T]) {
407 #[cfg(all(feature = "cuda", target_arch = "x86_64"))]
408 if self.size >= 32 && std::mem::size_of::<T>() == 32 {
410 let result = snarkvm_algorithms_cuda::NTT(
411 self.size as usize,
412 x_s,
413 snarkvm_algorithms_cuda::NTTInputOutputOrder::NN,
414 snarkvm_algorithms_cuda::NTTDirection::Inverse,
415 snarkvm_algorithms_cuda::NTTType::Standard,
416 );
417 if result.is_ok() {
418 return;
419 }
420 }
421
422 let pc = self.precompute_ifft();
423 self.ifft_helper_in_place_with_pc(x_s, FFTOrder::II, &pc);
424 cfg_iter_mut!(x_s, 1_000).for_each(|val| *val *= self.size_inv);
425 }
426
427 pub(crate) fn in_order_coset_ifft_in_place<T: DomainCoeff<F>>(&self, x_s: &mut [T]) {
428 #[cfg(all(feature = "cuda", target_arch = "x86_64"))]
429 if self.size >= 32 && std::mem::size_of::<T>() == 32 {
431 let result = snarkvm_algorithms_cuda::NTT(
432 self.size as usize,
433 x_s,
434 snarkvm_algorithms_cuda::NTTInputOutputOrder::NN,
435 snarkvm_algorithms_cuda::NTTDirection::Inverse,
436 snarkvm_algorithms_cuda::NTTType::Coset,
437 );
438 if result.is_ok() {
439 return;
440 }
441 }
442
443 let pc = self.precompute_ifft();
444 self.ifft_helper_in_place_with_pc(x_s, FFTOrder::II, &pc);
445 let coset_shift = self.generator_inv;
446 Self::distribute_powers_and_mul_by_const(x_s, coset_shift, self.size_inv);
447 }
448
449 #[allow(unused)]
450 pub(crate) fn in_order_fft_in_place_with_pc<T: DomainCoeff<F>>(
451 &self,
452 x_s: &mut [T],
453 pre_comp: &FFTPrecomputation<F>,
454 ) {
455 #[cfg(all(feature = "cuda", target_arch = "x86_64"))]
456 if self.size >= 32 && std::mem::size_of::<T>() == 32 {
458 let result = snarkvm_algorithms_cuda::NTT(
459 self.size as usize,
460 x_s,
461 snarkvm_algorithms_cuda::NTTInputOutputOrder::NN,
462 snarkvm_algorithms_cuda::NTTDirection::Forward,
463 snarkvm_algorithms_cuda::NTTType::Standard,
464 );
465 if result.is_ok() {
466 return;
467 }
468 }
469
470 self.fft_helper_in_place_with_pc(x_s, FFTOrder::II, pre_comp)
471 }
472
473 pub(crate) fn out_order_fft_in_place_with_pc<T: DomainCoeff<F>>(
474 &self,
475 x_s: &mut [T],
476 pre_comp: &FFTPrecomputation<F>,
477 ) {
478 self.fft_helper_in_place_with_pc(x_s, FFTOrder::IO, pre_comp)
479 }
480
481 pub(crate) fn in_order_ifft_in_place_with_pc<T: DomainCoeff<F>>(
482 &self,
483 x_s: &mut [T],
484 pre_comp: &IFFTPrecomputation<F>,
485 ) {
486 #[cfg(all(feature = "cuda", target_arch = "x86_64"))]
487 if self.size >= 32 && std::mem::size_of::<T>() == 32 {
489 let result = snarkvm_algorithms_cuda::NTT(
490 self.size as usize,
491 x_s,
492 snarkvm_algorithms_cuda::NTTInputOutputOrder::NN,
493 snarkvm_algorithms_cuda::NTTDirection::Inverse,
494 snarkvm_algorithms_cuda::NTTType::Standard,
495 );
496 if result.is_ok() {
497 return;
498 }
499 }
500
501 self.ifft_helper_in_place_with_pc(x_s, FFTOrder::II, pre_comp);
502 cfg_iter_mut!(x_s, 1_000).for_each(|val| *val *= self.size_inv);
503 }
504
505 pub(crate) fn out_order_ifft_in_place_with_pc<T: DomainCoeff<F>>(
506 &self,
507 x_s: &mut [T],
508 pre_comp: &IFFTPrecomputation<F>,
509 ) {
510 self.ifft_helper_in_place_with_pc(x_s, FFTOrder::OI, pre_comp);
511 cfg_iter_mut!(x_s, 1_000).for_each(|val| *val *= self.size_inv);
512 }
513
514 #[allow(unused)]
515 pub(crate) fn in_order_coset_ifft_in_place_with_pc<T: DomainCoeff<F>>(
516 &self,
517 x_s: &mut [T],
518 pre_comp: &IFFTPrecomputation<F>,
519 ) {
520 #[cfg(all(feature = "cuda", target_arch = "x86_64"))]
521 if self.size >= 32 && std::mem::size_of::<T>() == 32 {
523 let result = snarkvm_algorithms_cuda::NTT(
524 self.size as usize,
525 x_s,
526 snarkvm_algorithms_cuda::NTTInputOutputOrder::NN,
527 snarkvm_algorithms_cuda::NTTDirection::Inverse,
528 snarkvm_algorithms_cuda::NTTType::Coset,
529 );
530 if result.is_ok() {
531 return;
532 }
533 }
534
535 self.ifft_helper_in_place_with_pc(x_s, FFTOrder::II, pre_comp);
536 let coset_shift = self.generator_inv;
537 Self::distribute_powers_and_mul_by_const(x_s, coset_shift, self.size_inv);
538 }
539
540 fn fft_helper_in_place_with_pc<T: DomainCoeff<F>>(
541 &self,
542 x_s: &mut [T],
543 ord: FFTOrder,
544 pre_comp: &FFTPrecomputation<F>,
545 ) {
546 use FFTOrder::*;
547 let pc = pre_comp.precomputation_for_subdomain(self).unwrap();
548
549 let log_len = log2(x_s.len());
550
551 if ord == OI {
552 self.oi_helper_with_roots(x_s, &pc.roots);
553 } else {
554 self.io_helper_with_roots(x_s, &pc.roots);
555 }
556
557 if ord == II {
558 derange_helper(x_s, log_len);
559 }
560 }
561
562 fn ifft_helper_in_place_with_pc<T: DomainCoeff<F>>(
566 &self,
567 x_s: &mut [T],
568 ord: FFTOrder,
569 pre_comp: &IFFTPrecomputation<F>,
570 ) {
571 use FFTOrder::*;
572 let pc = pre_comp.precomputation_for_subdomain(self).unwrap();
573
574 let log_len = log2(x_s.len());
575
576 if ord == II {
577 derange_helper(x_s, log_len);
578 }
579
580 if ord == IO {
581 self.io_helper_with_roots(x_s, &pc.inverse_roots);
582 } else {
583 self.oi_helper_with_roots(x_s, &pc.inverse_roots);
584 }
585 }
586
587 #[cfg(feature = "serial")]
591 pub fn roots_of_unity(&self, root: F) -> Vec<F> {
592 compute_powers_serial((self.size as usize) / 2, root)
593 }
594
595 #[cfg(not(feature = "serial"))]
597 pub fn roots_of_unity(&self, root: F) -> Vec<F> {
598 let log_size = log2(self.size as usize);
600 if log_size <= LOG_ROOTS_OF_UNITY_PARALLEL_SIZE {
602 compute_powers_serial((self.size as usize) / 2, root)
603 } else {
604 let mut temp = root;
605 let log_powers: Vec<F> = (0..(log_size - 1))
607 .map(|_| {
608 let old_value = temp;
609 temp.square_in_place();
610 old_value
611 })
612 .collect();
613
614 let mut powers = vec![F::zero(); 1 << (log_size - 1)];
616 Self::roots_of_unity_recursive(&mut powers, &log_powers);
617 powers
618 }
619 }
620
621 #[cfg(not(feature = "serial"))]
622 fn roots_of_unity_recursive(out: &mut [F], log_powers: &[F]) {
623 assert_eq!(out.len(), 1 << log_powers.len());
624 if log_powers.len() <= LOG_ROOTS_OF_UNITY_PARALLEL_SIZE as usize {
627 out[0] = F::one();
628 for idx in 1..out.len() {
629 out[idx] = out[idx - 1] * log_powers[0];
630 }
631 return;
632 }
633
634 let (lr_lo, lr_hi) = log_powers.split_at(log_powers.len().div_ceil(2));
637 let mut scr_lo = vec![F::default(); 1 << lr_lo.len()];
638 let mut scr_hi = vec![F::default(); 1 << lr_hi.len()];
639 rayon::join(
641 || Self::roots_of_unity_recursive(&mut scr_lo, lr_lo),
642 || Self::roots_of_unity_recursive(&mut scr_hi, lr_hi),
643 );
644 out.par_chunks_mut(scr_lo.len()).zip(&scr_hi).for_each(|(out_chunk, scr_hi)| {
647 for (out_elem, scr_lo) in out_chunk.iter_mut().zip(&scr_lo) {
648 *out_elem = *scr_hi * scr_lo;
649 }
650 });
651 }
652
653 #[inline(always)]
654 fn butterfly_fn_io<T: DomainCoeff<F>>(((lo, hi), root): ((&mut T, &mut T), &F)) {
655 let neg = *lo - *hi;
656 *lo += *hi;
657 *hi = neg;
658 *hi *= *root;
659 }
660
661 #[inline(always)]
662 fn butterfly_fn_oi<T: DomainCoeff<F>>(((lo, hi), root): ((&mut T, &mut T), &F)) {
663 *hi *= *root;
664 let neg = *lo - *hi;
665 *lo += *hi;
666 *hi = neg;
667 }
668
669 #[allow(clippy::too_many_arguments)]
670 fn apply_butterfly<T: DomainCoeff<F>, G: Fn(((&mut T, &mut T), &F)) + Copy + Sync + Send>(
671 g: G,
672 xi: &mut [T],
673 roots: &[F],
674 step: usize,
675 chunk_size: usize,
676 num_chunks: usize,
677 max_threads: usize,
678 gap: usize,
679 ) {
680 cfg_chunks_mut!(xi, chunk_size).for_each(|cxi| {
681 let (lo, hi) = cxi.split_at_mut(gap);
682 if gap > MIN_GAP_SIZE_FOR_PARALLELISATION && num_chunks < max_threads {
686 cfg_iter_mut!(lo, 1000).zip(hi).zip(cfg_iter!(roots, 1000).step_by(step)).for_each(g);
687 } else {
688 lo.iter_mut().zip(hi).zip(roots.iter().step_by(step)).for_each(g);
689 }
690 });
691 }
692
693 #[allow(clippy::unnecessary_to_owned)]
694 fn io_helper_with_roots<T: DomainCoeff<F>>(&self, xi: &mut [T], roots: &[F]) {
695 let mut roots = std::borrow::Cow::Borrowed(roots);
696
697 let mut step = 1;
698 let mut first = true;
699
700 #[cfg(not(feature = "serial"))]
701 let max_threads = snarkvm_utilities::parallel::max_available_threads();
702 #[cfg(feature = "serial")]
703 let max_threads = 1;
704
705 let mut gap = xi.len() / 2;
706 while gap > 0 {
707 let chunk_size = 2 * gap;
709 let num_chunks = xi.len() / chunk_size;
710
711 if num_chunks >= MIN_NUM_CHUNKS_FOR_COMPACTION {
715 if !first {
716 roots = Cow::Owned(cfg_into_iter!(roots.into_owned()).step_by(step * 2).collect());
717 }
718 step = 1;
719 roots.to_mut().shrink_to_fit();
720 } else {
721 step = num_chunks;
722 }
723 first = false;
724
725 Self::apply_butterfly(
726 Self::butterfly_fn_io,
727 xi,
728 &roots[..],
729 step,
730 chunk_size,
731 num_chunks,
732 max_threads,
733 gap,
734 );
735
736 gap /= 2;
737 }
738 }
739
740 fn oi_helper_with_roots<T: DomainCoeff<F>>(&self, xi: &mut [T], roots_cache: &[F]) {
741 let compaction_max_size =
746 core::cmp::min(roots_cache.len() / 2, roots_cache.len() / MIN_NUM_CHUNKS_FOR_COMPACTION);
747 let mut compacted_roots = vec![F::default(); compaction_max_size];
748
749 #[cfg(not(feature = "serial"))]
750 let max_threads = snarkvm_utilities::parallel::max_available_threads();
751 #[cfg(feature = "serial")]
752 let max_threads = 1;
753
754 let mut gap = 1;
755 while gap < xi.len() {
756 let chunk_size = 2 * gap;
758 let num_chunks = xi.len() / chunk_size;
759
760 let (roots, step) = if num_chunks >= MIN_NUM_CHUNKS_FOR_COMPACTION && gap < xi.len() / 2 {
764 cfg_iter_mut!(compacted_roots[..gap], 1_000)
765 .zip(cfg_iter!(roots_cache[..(gap * num_chunks)], 1_000).step_by(num_chunks))
766 .for_each(|(a, b)| *a = *b);
767 (&compacted_roots[..gap], 1)
768 } else {
769 (roots_cache, num_chunks)
770 };
771
772 Self::apply_butterfly(Self::butterfly_fn_oi, xi, roots, step, chunk_size, num_chunks, max_threads, gap);
773
774 gap *= 2;
775 }
776 }
777}
778
779const MIN_NUM_CHUNKS_FOR_COMPACTION: usize = 1 << 7;
782
783const MIN_GAP_SIZE_FOR_PARALLELISATION: usize = 1 << 10;
786
787#[cfg(not(feature = "serial"))]
789const LOG_ROOTS_OF_UNITY_PARALLEL_SIZE: u32 = 7;
790
791#[inline]
792pub(super) fn bitrev(a: u64, log_len: u32) -> u64 {
793 a.reverse_bits() >> (64 - log_len)
794}
795
796pub(crate) fn derange<T>(xi: &mut [T]) {
797 derange_helper(xi, log2(xi.len()))
798}
799
800fn derange_helper<T>(xi: &mut [T], log_len: u32) {
801 for idx in 1..(xi.len() as u64 - 1) {
802 let ridx = bitrev(idx, log_len);
803 if idx < ridx {
804 xi.swap(idx as usize, ridx as usize);
805 }
806 }
807}
808
809#[derive(PartialEq, Eq, Debug)]
810enum FFTOrder {
811 II,
813 IO,
816 OI,
819}
820
821pub(crate) fn compute_powers_serial<F: Field>(size: usize, root: F) -> Vec<F> {
822 compute_powers_and_mul_by_const_serial(size, root, F::one())
823}
824
825pub(crate) fn compute_powers_and_mul_by_const_serial<F: Field>(size: usize, root: F, c: F) -> Vec<F> {
826 let mut value = c;
827 (0..size)
828 .map(|_| {
829 let old_value = value;
830 value *= root;
831 old_value
832 })
833 .collect()
834}
835
836#[allow(unused)]
837#[cfg(not(feature = "serial"))]
838pub(crate) fn compute_powers<F: Field>(size: usize, g: F) -> Vec<F> {
839 if size < MIN_PARALLEL_CHUNK_SIZE {
840 return compute_powers_serial(size, g);
841 }
842 let num_cpus_available = max_available_threads();
844 let num_elem_per_thread = core::cmp::max(size / num_cpus_available, MIN_PARALLEL_CHUNK_SIZE);
845 let num_cpus_used = size / num_elem_per_thread;
846
847 let res: Vec<F> = (0..num_cpus_used)
849 .into_par_iter()
850 .flat_map(|i| {
851 let offset = g.pow([(i * num_elem_per_thread) as u64]);
852 let num_elements_to_compute = core::cmp::min(size - i * num_elem_per_thread, num_elem_per_thread);
856 compute_powers_and_mul_by_const_serial(num_elements_to_compute, g, offset)
857 })
858 .collect();
859 res
860}
861
862#[derive(Clone)]
864pub struct Elements<F: FftField> {
865 cur_elem: F,
866 cur_pow: u64,
867 domain: EvaluationDomain<F>,
868}
869
870impl<F: FftField> Iterator for Elements<F> {
871 type Item = F;
872
873 fn next(&mut self) -> Option<F> {
874 if self.cur_pow == self.domain.size {
875 None
876 } else {
877 let cur_elem = self.cur_elem;
878 self.cur_elem *= &self.domain.group_gen;
879 self.cur_pow += 1;
880 Some(cur_elem)
881 }
882 }
883}
884
885#[derive(Clone, Eq, PartialEq, Debug, CanonicalDeserialize, CanonicalSerialize)]
887pub struct FFTPrecomputation<F: FftField> {
888 roots: Vec<F>,
889 domain: EvaluationDomain<F>,
890}
891
892impl<F: FftField> FFTPrecomputation<F> {
893 pub fn to_ifft_precomputation(&self) -> IFFTPrecomputation<F> {
894 let mut inverse_roots = self.roots.clone();
895 snarkvm_fields::batch_inversion(&mut inverse_roots);
896 IFFTPrecomputation { inverse_roots, domain: self.domain }
897 }
898
899 pub fn precomputation_for_subdomain<'a>(&'a self, domain: &EvaluationDomain<F>) -> Option<Cow<'a, Self>> {
900 if domain.size() == 1 {
901 return Some(Cow::Owned(Self { roots: vec![], domain: *domain }));
902 }
903 if &self.domain == domain {
904 Some(Cow::Borrowed(self))
905 } else if domain.size() < self.domain.size() {
906 let size_ratio = self.domain.size() / domain.size();
907 let roots = self.roots.iter().step_by(size_ratio).copied().collect();
908 Some(Cow::Owned(Self { roots, domain: *domain }))
909 } else {
910 None
911 }
912 }
913}
914
915#[derive(Clone, Eq, PartialEq, Debug, CanonicalSerialize, CanonicalDeserialize)]
917pub struct IFFTPrecomputation<F: FftField> {
918 inverse_roots: Vec<F>,
919 domain: EvaluationDomain<F>,
920}
921
922impl<F: FftField> IFFTPrecomputation<F> {
923 pub fn precomputation_for_subdomain<'a>(&'a self, domain: &EvaluationDomain<F>) -> Option<Cow<'a, Self>> {
924 if domain.size() == 1 {
925 return Some(Cow::Owned(Self { inverse_roots: vec![], domain: *domain }));
926 }
927 if &self.domain == domain {
928 Some(Cow::Borrowed(self))
929 } else if domain.size() < self.domain.size() {
930 let size_ratio = self.domain.size() / domain.size();
931 let inverse_roots = self.inverse_roots.iter().step_by(size_ratio).copied().collect();
932 Some(Cow::Owned(Self { inverse_roots, domain: *domain }))
933 } else {
934 None
935 }
936 }
937}
938
939#[cfg(test)]
940mod tests {
941 #[cfg(all(feature = "cuda", target_arch = "x86_64"))]
942 use crate::fft::domain::FFTOrder;
943 use crate::fft::{DensePolynomial, EvaluationDomain};
944 use rand::Rng;
945 use snarkvm_curves::bls12_377::Fr;
946 use snarkvm_fields::{FftField, Field, One, Zero};
947 use snarkvm_utilities::{TestRng, Uniform};
948
949 #[test]
950 fn vanishing_polynomial_evaluation() {
951 let rng = &mut TestRng::default();
952 for coeffs in 0..10 {
953 let domain = EvaluationDomain::<Fr>::new(coeffs).unwrap();
954 let z = domain.vanishing_polynomial();
955 for _ in 0..100 {
956 let point = rng.r#gen();
957 assert_eq!(z.evaluate(point), domain.evaluate_vanishing_polynomial(point))
958 }
959 }
960 }
961
962 #[test]
963 fn vanishing_polynomial_vanishes_on_domain() {
964 for coeffs in 0..1000 {
965 let domain = EvaluationDomain::<Fr>::new(coeffs).unwrap();
966 let z = domain.vanishing_polynomial();
967 for point in domain.elements() {
968 assert!(z.evaluate(point).is_zero())
969 }
970 }
971 }
972
973 #[test]
974 fn size_of_elements() {
975 for coeffs in 1..10 {
976 let size = 1 << coeffs;
977 let domain = EvaluationDomain::<Fr>::new(size).unwrap();
978 let domain_size = domain.size();
979 assert_eq!(domain_size, domain.elements().count());
980 }
981 }
982
983 #[test]
984 fn elements_contents() {
985 for coeffs in 1..10 {
986 let size = 1 << coeffs;
987 let domain = EvaluationDomain::<Fr>::new(size).unwrap();
988 for (i, element) in domain.elements().enumerate() {
989 assert_eq!(element, domain.group_gen.pow([i as u64]));
990 }
991 }
992 }
993
994 #[test]
997 fn non_systematic_lagrange_coefficients_test() {
998 let mut rng = TestRng::default();
999 for domain_dimension in 1..10 {
1000 let domain_size = 1 << domain_dimension;
1001 let domain = EvaluationDomain::<Fr>::new(domain_size).unwrap();
1002 let random_point = Fr::rand(&mut rng);
1004 let lagrange_coefficients = domain.evaluate_all_lagrange_coefficients(random_point);
1005
1006 let random_polynomial = DensePolynomial::<Fr>::rand(domain_size - 1, &mut rng);
1009 let polynomial_evaluations = domain.fft(random_polynomial.coeffs());
1010 let actual_evaluations = random_polynomial.evaluate(random_point);
1011
1012 let mut interpolated_evaluation = Fr::zero();
1014 for i in 0..domain_size {
1015 interpolated_evaluation += lagrange_coefficients[i] * polynomial_evaluations[i];
1016 }
1017 assert_eq!(actual_evaluations, interpolated_evaluation);
1018 }
1019 }
1020
1021 #[test]
1023 fn systematic_lagrange_coefficients_test() {
1024 for domain_dimension in 1..5 {
1028 let domain_size = 1 << domain_dimension;
1029 let domain = EvaluationDomain::<Fr>::new(domain_size).unwrap();
1030 let all_domain_elements: Vec<Fr> = domain.elements().collect();
1031 for (i, domain_element) in all_domain_elements.iter().enumerate().take(domain_size) {
1032 let lagrange_coefficients = domain.evaluate_all_lagrange_coefficients(*domain_element);
1033 for (j, lagrange_coefficient) in lagrange_coefficients.iter().enumerate().take(domain_size) {
1034 if i == j {
1036 assert_eq!(*lagrange_coefficient, Fr::one());
1037 } else {
1038 assert_eq!(*lagrange_coefficient, Fr::zero());
1039 }
1040 }
1041 }
1042 }
1043 }
1044
1045 #[test]
1047 fn test_roots_of_unity() {
1048 let max_degree = 10;
1049 for log_domain_size in 0..max_degree {
1050 let domain_size = 1 << log_domain_size;
1051 let domain = EvaluationDomain::<Fr>::new(domain_size).unwrap();
1052 let actual_roots = domain.roots_of_unity(domain.group_gen);
1053 for &value in &actual_roots {
1054 assert!(domain.evaluate_vanishing_polynomial(value).is_zero());
1055 }
1056 let expected_roots_elements = domain.elements();
1057 for (expected, &actual) in expected_roots_elements.zip(&actual_roots) {
1058 assert_eq!(expected, actual);
1059 }
1060 assert_eq!(actual_roots.len(), domain_size / 2);
1061 }
1062 }
1063
1064 #[test]
1066 fn test_fft_correctness() {
1067 let mut rng = TestRng::default();
1072
1073 let log_degree = 5;
1075 let degree = 1 << log_degree;
1076 let random_polynomial = DensePolynomial::<Fr>::rand(degree - 1, &mut rng);
1077
1078 for log_domain_size in log_degree..(log_degree + 2) {
1079 let domain_size = 1 << log_domain_size;
1080 let domain = EvaluationDomain::<Fr>::new(domain_size).unwrap();
1081 let polynomial_evaluations = domain.fft(&random_polynomial.coeffs);
1082 let polynomial_coset_evaluations = domain.coset_fft(&random_polynomial.coeffs);
1083 for (i, x) in domain.elements().enumerate() {
1084 let coset_x = Fr::multiplicative_generator() * x;
1085
1086 assert_eq!(polynomial_evaluations[i], random_polynomial.evaluate(x));
1087 assert_eq!(polynomial_coset_evaluations[i], random_polynomial.evaluate(coset_x));
1088 }
1089
1090 let randon_polynomial_from_subgroup =
1091 DensePolynomial::from_coefficients_vec(domain.ifft(&polynomial_evaluations));
1092 let random_polynomial_from_coset =
1093 DensePolynomial::from_coefficients_vec(domain.coset_ifft(&polynomial_coset_evaluations));
1094
1095 assert_eq!(
1096 random_polynomial, randon_polynomial_from_subgroup,
1097 "degree = {degree}, domain size = {domain_size}"
1098 );
1099 assert_eq!(
1100 random_polynomial, random_polynomial_from_coset,
1101 "degree = {degree}, domain size = {domain_size}"
1102 );
1103 }
1104 }
1105
1106 #[test]
1108 fn test_fft_precomputation() {
1109 for i in 1..10 {
1110 let big_domain = EvaluationDomain::<Fr>::new(i).unwrap();
1111 let pc = big_domain.precompute_fft();
1112 for j in 1..i {
1113 let small_domain = EvaluationDomain::<Fr>::new(j).unwrap();
1114 let small_pc = small_domain.precompute_fft();
1115 assert_eq!(pc.precomputation_for_subdomain(&small_domain).unwrap().as_ref(), &small_pc);
1116 }
1117 }
1118 }
1119
1120 #[test]
1122 fn test_ifft_precomputation() {
1123 for i in 1..10 {
1124 let big_domain = EvaluationDomain::<Fr>::new(i).unwrap();
1125 let pc = big_domain.precompute_ifft();
1126 for j in 1..i {
1127 let small_domain = EvaluationDomain::<Fr>::new(j).unwrap();
1128 let small_pc = small_domain.precompute_ifft();
1129 assert_eq!(pc.precomputation_for_subdomain(&small_domain).unwrap().as_ref(), &small_pc);
1130 }
1131 }
1132 }
1133
1134 #[test]
1137 fn test_ifft_precomputation_from_fft() {
1138 for i in 1..10 {
1139 let domain = EvaluationDomain::<Fr>::new(i).unwrap();
1140 let pc = domain.precompute_ifft();
1141 let fft_pc = domain.precompute_fft();
1142 assert_eq!(pc, fft_pc.to_ifft_precomputation())
1143 }
1144 }
1145
1146 #[cfg(all(feature = "cuda", target_arch = "x86_64"))]
1148 #[test]
1149 fn test_fft_correctness_cuda() {
1150 let mut rng = TestRng::default();
1151 for log_domain in 2..20 {
1152 println!("Testing domain size {log_domain}");
1153 let domain_size = 1 << log_domain;
1154 let random_polynomial = DensePolynomial::<Fr>::rand(domain_size - 1, &mut rng);
1155 let mut polynomial_evaluations = random_polynomial.coeffs.clone();
1156 let mut polynomial_evaluations_cuda = random_polynomial.coeffs.clone();
1157
1158 let domain = EvaluationDomain::<Fr>::new(domain_size).unwrap();
1159 let pc = domain.precompute_fft();
1160 domain.fft_helper_in_place_with_pc(&mut polynomial_evaluations, FFTOrder::II, &pc);
1161
1162 if snarkvm_algorithms_cuda::NTT::<Fr>(
1163 domain_size,
1164 &mut polynomial_evaluations_cuda,
1165 snarkvm_algorithms_cuda::NTTInputOutputOrder::NN,
1166 snarkvm_algorithms_cuda::NTTDirection::Forward,
1167 snarkvm_algorithms_cuda::NTTType::Standard,
1168 )
1169 .is_err()
1170 {
1171 println!("cuda error!");
1172 }
1173
1174 assert_eq!(polynomial_evaluations, polynomial_evaluations_cuda, "domain size = {domain_size}");
1175
1176 if snarkvm_algorithms_cuda::NTT::<Fr>(
1178 domain_size,
1179 &mut polynomial_evaluations_cuda,
1180 snarkvm_algorithms_cuda::NTTInputOutputOrder::NN,
1181 snarkvm_algorithms_cuda::NTTDirection::Inverse,
1182 snarkvm_algorithms_cuda::NTTType::Standard,
1183 )
1184 .is_err()
1185 {
1186 println!("cuda error!");
1187 }
1188 assert_eq!(random_polynomial.coeffs, polynomial_evaluations_cuda, "domain size = {domain_size}");
1189
1190 polynomial_evaluations = random_polynomial.coeffs.clone();
1192 let domain = EvaluationDomain::<Fr>::new(domain_size).unwrap();
1193 let pc = domain.precompute_fft();
1194 EvaluationDomain::<Fr>::distribute_powers(&mut polynomial_evaluations, Fr::multiplicative_generator());
1195 domain.fft_helper_in_place_with_pc(&mut polynomial_evaluations, FFTOrder::II, &pc);
1196
1197 if snarkvm_algorithms_cuda::NTT::<Fr>(
1198 domain_size,
1199 &mut polynomial_evaluations_cuda,
1200 snarkvm_algorithms_cuda::NTTInputOutputOrder::NN,
1201 snarkvm_algorithms_cuda::NTTDirection::Forward,
1202 snarkvm_algorithms_cuda::NTTType::Coset,
1203 )
1204 .is_err()
1205 {
1206 println!("cuda error!");
1207 }
1208
1209 assert_eq!(polynomial_evaluations, polynomial_evaluations_cuda, "domain size = {domain_size}");
1210
1211 if snarkvm_algorithms_cuda::NTT::<Fr>(
1213 domain_size,
1214 &mut polynomial_evaluations_cuda,
1215 snarkvm_algorithms_cuda::NTTInputOutputOrder::NN,
1216 snarkvm_algorithms_cuda::NTTDirection::Inverse,
1217 snarkvm_algorithms_cuda::NTTType::Coset,
1218 )
1219 .is_err()
1220 {
1221 println!("cuda error!");
1222 }
1223 assert_eq!(random_polynomial.coeffs, polynomial_evaluations_cuda, "domain size = {domain_size}");
1224 }
1225 }
1226}