use crate::algorithms::unity_root::*;
use crate::divisibility::{DivisibilityRingStore, DivisibilityRing};
use crate::primitive_int::*;
use crate::mempool::*;
use crate::rings::zn::*;
use crate::vector::SwappableVectorViewMut;
use crate::ring::*;
use crate::vector::VectorViewMut;
use crate::homomorphism::*;
use crate::algorithms::fft::*;
use crate::algorithms;
use crate::rings::float_complex::*;
use super::complex_fft::*;
pub struct FFTTableCooleyTuckey<R>
where R: RingStore
{
ring: R,
root_of_unity: El<R>,
log2_n: usize,
root_of_unity_list: Vec<El<R>>,
inv_root_of_unity_list: Vec<El<R>>
}
pub fn bitreverse(index: usize, bits: usize) -> usize {
index.reverse_bits().checked_shr(usize::BITS - bits as u32).unwrap_or(0)
}
impl<R> FFTTableCooleyTuckey<R>
where R: DivisibilityRingStore,
R::Type: DivisibilityRing
{
pub fn new(ring: R, root_of_unity: El<R>, log2_n: usize) -> Self {
let mut root_of_unity_pow = |i: i64| if i >= 0 {
ring.pow(ring.clone_el(&root_of_unity), i as usize)
} else {
ring.invert(&ring.pow(ring.clone_el(&root_of_unity), (-i) as usize)).unwrap()
};
assert!(ring.is_commutative());
assert!(!ring.get_ring().is_approximate());
assert!(ring.get_ring().is_approximate() || is_prim_root_of_unity_pow2(&ring, &root_of_unity_pow(1), log2_n));
let root_of_unity_list = Self::create_root_of_unity_list(&ring, &mut root_of_unity_pow, log2_n);
let inv_root_of_unity_list = Self::create_root_of_unity_list(&ring, |i| root_of_unity_pow(-i), log2_n);
let root_of_unity = root_of_unity_pow(1);
FFTTableCooleyTuckey { ring, root_of_unity, log2_n, root_of_unity_list, inv_root_of_unity_list }
}
pub fn new_with_pows<F>(ring: R, mut root_of_unity_pow: F, log2_n: usize) -> Self
where F: FnMut(i64) -> El<R>
{
assert!(ring.is_commutative());
assert!(log2_n > 0);
assert!(ring.get_ring().is_approximate() || is_prim_root_of_unity_pow2(&ring, &root_of_unity_pow(1), log2_n));
let root_of_unity_list = Self::create_root_of_unity_list(&ring, &mut root_of_unity_pow, log2_n);
let inv_root_of_unity_list = Self::create_root_of_unity_list(&ring, |i| root_of_unity_pow(-i), log2_n);
let root_of_unity = root_of_unity_pow(1);
FFTTableCooleyTuckey { ring, root_of_unity, log2_n, root_of_unity_list, inv_root_of_unity_list }
}
fn create_root_of_unity_list<F>(ring: &R, mut root_of_unity_pow: F, log2_n: usize) -> Vec<El<R>>
where F: FnMut(i64) -> El<R>
{
let mut root_of_unity_list = AllocatingMemoryProvider.get_new_init(1 << log2_n, |_| ring.zero());
let mut index = 0;
for s in 0..log2_n {
let m = 1 << s;
let log2_group_size = log2_n - s;
for i_bitreverse in (0..(1 << log2_group_size)).step_by(2) {
let current_twiddle = root_of_unity_pow(m * bitreverse(i_bitreverse, log2_group_size) as i64);
root_of_unity_list[index] = current_twiddle;
index += 1;
}
}
assert_eq!(index, (1 << log2_n) - 1);
return root_of_unity_list;
}
pub fn len(&self) -> usize {
1 << self.log2_n
}
pub fn ring(&self) -> &R {
&self.ring
}
pub fn for_zn(ring: R, log2_n: usize) -> Option<Self>
where R: ZnRingStore,
R::Type: ZnRing,
<R::Type as ZnRing>::IntegerRingBase: CanHomFrom<StaticRingBase<i64>>
{
let root_of_unity = algorithms::unity_root::get_prim_root_of_unity_pow2(&ring, log2_n)?;
Some(Self::new(ring, root_of_unity, log2_n))
}
pub fn for_complex(ring: R, log2_n: usize) -> Self
where R: RingStore<Type = Complex64Base>
{
let CC = Complex64::RING;
Self::new_with_pows(ring, |i| CC.root_of_unity(i, 1 << log2_n), log2_n)
}
pub fn bitreverse_permute_inplace<V, T>(&self, mut values: V)
where V: SwappableVectorViewMut<T>
{
assert!(values.len() == 1 << self.log2_n);
for i in 0..(1 << self.log2_n) {
if bitreverse(i, self.log2_n) < i {
values.swap(i, bitreverse(i, self.log2_n));
}
}
}
}
impl<R> PartialEq for FFTTableCooleyTuckey<R>
where R: DivisibilityRingStore,
R::Type: DivisibilityRing
{
fn eq(&self, other: &Self) -> bool {
self.ring().get_ring() == other.ring().get_ring() &&
self.log2_n == other.log2_n &&
self.ring().eq_el(self.root_of_unity(), other.root_of_unity())
}
}
pub trait CooleyTuckeyButterfly<S>: RingBase
where S: ?Sized + RingBase
{
fn butterfly<V: VectorViewMut<Self::Element>, H: Homomorphism<S, Self>>(&self, hom: &H, values: &mut V, twiddle: &S::Element, i1: usize, i2: usize);
fn inv_butterfly<V: VectorViewMut<Self::Element>, H: Homomorphism<S, Self>>(&self, hom: &H, values: &mut V, twiddle: &S::Element, i1: usize, i2: usize);
}
impl<R, S> CooleyTuckeyButterfly<S> for R
where S: ?Sized + RingBase, R: ?Sized + RingBase
{
#[inline(always)]
default fn butterfly<V: VectorViewMut<Self::Element>, H: Homomorphism<S, Self>>(&self, hom: &H, values: &mut V, twiddle: &<S as RingBase>::Element, i1: usize, i2: usize) {
hom.mul_assign_map_ref(values.at_mut(i2), twiddle);
let new_a = self.add_ref(values.at(i1), values.at(i2));
let a = std::mem::replace(values.at_mut(i1), new_a);
self.sub_self_assign(values.at_mut(i2), a);
}
#[inline(always)]
default fn inv_butterfly<V: VectorViewMut<Self::Element>, H: Homomorphism<S, Self>>(&self, hom: &H, values: &mut V, twiddle: &<S as RingBase>::Element, i1: usize, i2: usize) {
let new_a = self.add_ref(values.at(i1), values.at(i2));
let a = std::mem::replace(values.at_mut(i1), new_a);
self.sub_self_assign(values.at_mut(i2), a);
hom.mul_assign_map_ref(values.at_mut(i2), twiddle);
}
}
impl<R> FFTTableCooleyTuckey<R>
where R: DivisibilityRingStore,
R::Type: DivisibilityRing
{
fn unordered_fft_dispatch<V, S, H, const INV: bool>(&self, values: &mut V, hom: &H)
where S: ?Sized + RingBase,
H: Homomorphism<R::Type, S>,
V: VectorViewMut<S::Element>
{
assert!(values.len() == (1 << self.log2_n));
assert!(hom.domain().get_ring() == self.ring().get_ring());
debug_assert!(hom.codomain().get_ring().is_approximate() || is_prim_root_of_unity_pow2(&hom.codomain(), &hom.map_ref(&self.root_of_unity), self.log2_n));
let R = hom.codomain();
for step in 0..self.log2_n {
let (log2_m, log2_group_size_half) = if !INV {
(self.log2_n - step - 1, step)
} else {
(step, self.log2_n - step - 1)
};
let group_size_half = 1 << log2_group_size_half;
let m = 1 << log2_m;
let two_m = 2 << log2_m;
const UNROLL_COUNT: usize = 4;
if group_size_half < UNROLL_COUNT {
for k in 0..(1 << log2_m) {
let mut root_of_unity_index = (1 << self.log2_n) - 2 * group_size_half;
for i_bitreverse in 0..group_size_half {
let index1 = i_bitreverse * two_m + k;
let index2 = index1 + m;
if !INV {
let current_twiddle = &self.inv_root_of_unity_list[root_of_unity_index];
R.get_ring().butterfly(hom, values, current_twiddle, index1, index2);
} else {
let current_twiddle = &self.root_of_unity_list[root_of_unity_index];
R.get_ring().inv_butterfly(hom, values, current_twiddle, index1, index2);
}
root_of_unity_index += 1;
}
}
} else {
for k in 0..m {
let mut root_of_unity_index = (1 << self.log2_n) - 2 * group_size_half;
let mut index1 = k;
for _ in (0..group_size_half).step_by(UNROLL_COUNT) {
for _ in 0..UNROLL_COUNT {
if !INV {
let current_twiddle = &self.inv_root_of_unity_list[root_of_unity_index];
R.get_ring().butterfly(hom, values, current_twiddle, index1, index1 + m);
} else {
let current_twiddle = &self.root_of_unity_list[root_of_unity_index];
R.get_ring().inv_butterfly(hom, values, current_twiddle, index1, index1 + m);
}
root_of_unity_index += 1;
index1 += two_m;
}
}
}
}
}
}
}
impl<R> FFTTable for FFTTableCooleyTuckey<R>
where R: DivisibilityRingStore,
R::Type: DivisibilityRing
{
type Ring = R;
fn len(&self) -> usize {
1 << self.log2_n
}
fn ring(&self) -> &R {
&self.ring
}
fn root_of_unity(&self) -> &El<R> {
&self.root_of_unity
}
fn unordered_fft_permutation(&self, i: usize) -> usize {
bitreverse(i, self.log2_n)
}
fn unordered_fft_permutation_inv(&self, i: usize) -> usize {
bitreverse(i, self.log2_n)
}
fn fft<V, S, M, H>(&self, mut values: V, memory_provider: &M, hom: &H)
where S: ?Sized + RingBase,
H: Homomorphism<<Self::Ring as RingStore>::Type, S>,
V: SwappableVectorViewMut<S::Element>,
M: MemoryProvider<S::Element>
{
self.unordered_fft(&mut values, memory_provider, hom);
self.bitreverse_permute_inplace(&mut values);
}
fn inv_fft<V, S, M, H>(&self, mut values: V, memory_provider: &M, hom: &H)
where S: ?Sized + RingBase,
H: Homomorphism<<Self::Ring as RingStore>::Type, S>,
V: SwappableVectorViewMut<S::Element>,
M: MemoryProvider<S::Element>
{
self.bitreverse_permute_inplace(&mut values);
self.unordered_inv_fft(&mut values, memory_provider, hom);
}
fn unordered_fft<V, S, M, H>(&self, mut values: V, _memory_provider: &M, hom: &H)
where S: ?Sized + RingBase,
H: Homomorphism<<Self::Ring as RingStore>::Type, S>,
V: VectorViewMut<S::Element>,
M: MemoryProvider<S::Element>
{
self.unordered_fft_dispatch::<V, S, H, false>(&mut values, hom);
}
fn unordered_inv_fft<V, S, M, H>(&self, mut values: V, _memory_provider: &M, hom: &H)
where S: ?Sized + RingBase,
H: Homomorphism<<Self::Ring as RingStore>::Type, S>,
V: VectorViewMut<S::Element>,
M: MemoryProvider<S::Element>
{
self.unordered_fft_dispatch::<V, S, H, true>(&mut values, hom);
let inv = hom.map(self.ring.invert(&self.ring.int_hom().map(1 << self.log2_n)).unwrap());
for i in 0..values.len() {
hom.codomain().mul_assign_ref(values.at_mut(i), &inv);
}
}
}
impl<R: RingStore<Type = Complex64Base>> ErrorEstimate for FFTTableCooleyTuckey<R> {
fn expected_absolute_error(&self, input_bound: f64, input_error: f64) -> f64 {
let butterfly_absolute_error = input_bound * (root_of_unity_error() + f64::EPSILON);
return 2. * self.len() as f64 * butterfly_absolute_error + self.len() as f64 * input_error;
}
}
#[cfg(test)]
use crate::rings::zn::zn_static::Fp;
#[cfg(test)]
use crate::rings::zn::zn_barett;
#[cfg(test)]
use crate::rings::zn::zn_static;
#[cfg(test)]
use crate::field::*;
#[cfg(test)]
use crate::rings::finite::FiniteRingStore;
#[cfg(test)]
use crate::default_memory_provider;
#[test]
fn test_bitreverse_fft_inplace_basic() {
let ring = Fp::<5>::RING;
let z = ring.int_hom().map(2);
let fft = FFTTableCooleyTuckey::new(ring, ring.div(&1, &z), 2);
let mut values = [1, 0, 0, 1];
let expected = [2, 4, 0, 3];
let mut bitreverse_expected = [0; 4];
for i in 0..4 {
bitreverse_expected[i] = expected[bitreverse(i, 2)];
}
fft.unordered_fft(&mut values, &default_memory_provider!(), &fft.ring().identity());
assert_eq!(values, bitreverse_expected);
}
#[test]
fn test_bitreverse_fft_inplace_advanced() {
let ring = Fp::<17>::RING;
let z = ring.int_hom().map(3);
let fft = FFTTableCooleyTuckey::new(ring, z, 4);
let mut values = [1, 0, 0, 0, 1, 0, 0, 0, 4, 3, 2, 1, 4, 3, 2, 1];
let expected = [5, 2, 0, 11, 5, 4, 0, 6, 6, 13, 0, 1, 7, 6, 0, 1];
let mut bitreverse_expected = [0; 16];
for i in 0..16 {
bitreverse_expected[i] = expected[bitreverse(i, 4)];
}
fft.unordered_fft(&mut values, &default_memory_provider!(), &fft.ring().identity());
assert_eq!(values, bitreverse_expected);
}
#[test]
fn test_bitreverse_inv_fft_inplace() {
let ring = Fp::<17>::RING;
let fft = FFTTableCooleyTuckey::for_zn(&ring, 4).unwrap();
let values: [u64; 16] = [1, 2, 3, 2, 1, 0, 17 - 1, 17 - 2, 17 - 1, 0, 1, 2, 3, 4, 5, 6];
let mut work = values;
fft.unordered_fft(&mut work, &default_memory_provider!(), &fft.ring().identity());
fft.unordered_inv_fft(&mut work, &default_memory_provider!(), &fft.ring().identity());
assert_eq!(&work, &values);
}
#[test]
fn test_for_zn() {
let ring = Fp::<17>::RING;
let fft = FFTTableCooleyTuckey::for_zn(ring, 4).unwrap();
assert!(ring.is_neg_one(&ring.pow(fft.root_of_unity, 8)));
let ring = Fp::<97>::RING;
let fft = FFTTableCooleyTuckey::for_zn(ring, 4).unwrap();
assert!(ring.is_neg_one(&ring.pow(fft.root_of_unity, 8)));
}
#[cfg(test)]
fn run_fft_bench_round<R, S>(ring: S, fft: &FFTTableCooleyTuckey<R>, data: &Vec<El<S>>, copy: &mut Vec<El<S>>)
where R: ZnRingStore, R::Type: ZnRing, S: ZnRingStore, S::Type: ZnRing + CanHomFrom<R::Type>
{
copy.clear();
copy.extend(data.iter().map(|x| ring.clone_el(x)));
fft.unordered_fft(&mut copy[..], &default_memory_provider!(), &ring.can_hom(fft.ring()).unwrap());
fft.unordered_inv_fft(&mut copy[..], &default_memory_provider!(), &ring.can_hom(fft.ring()).unwrap());
assert_el_eq!(&ring, ©[0], &data[0]);
}
#[cfg(test)]
const BENCH_SIZE_LOG2: usize = 13;
#[bench]
fn bench_fft(bencher: &mut test::Bencher) {
let ring = zn_barett::Zn::new(StaticRing::<i128>::RING, 1073872897);
let fft = FFTTableCooleyTuckey::for_zn(&ring, BENCH_SIZE_LOG2).unwrap();
let data = (0..(1 << BENCH_SIZE_LOG2)).map(|i| ring.int_hom().map(i)).collect::<Vec<_>>();
let mut copy = Vec::with_capacity(1 << BENCH_SIZE_LOG2);
bencher.iter(|| {
run_fft_bench_round(&ring, &fft, &data, &mut copy)
});
}
#[bench]
fn bench_fft_zn64_fastmul(bencher: &mut test::Bencher) {
let ring = zn_64::Zn::new(1073872897);
let fastmul_ring = zn_64::ZnFastmul::new(ring);
let fft = FFTTableCooleyTuckey::for_zn(&fastmul_ring, BENCH_SIZE_LOG2).unwrap();
let data = (0..(1 << BENCH_SIZE_LOG2)).map(|i| ring.int_hom().map(i)).collect::<Vec<_>>();
let mut copy = Vec::with_capacity(1 << BENCH_SIZE_LOG2);
bencher.iter(|| {
run_fft_bench_round(&ring, &fft, &data, &mut copy)
});
}
#[test]
fn test_approximate_fft() {
let CC = Complex64::RING;
for log2_n in [4, 7, 11, 15] {
let fft = FFTTableCooleyTuckey::new_with_pows(CC, |x| CC.root_of_unity(x, 1 << log2_n), log2_n);
let mut array = default_memory_provider!().get_new_init(1 << log2_n, |i| CC.root_of_unity(i as i64, 1 << log2_n));
fft.fft(&mut array, &default_memory_provider!(), &CC.identity());
let err = fft.expected_absolute_error(1., 0.);
assert!(CC.is_absolute_approx_eq(array[0], CC.zero(), err));
assert!(CC.is_absolute_approx_eq(array[1], CC.from_f64(fft.len() as f64), err));
for i in 2..fft.len() {
assert!(CC.is_absolute_approx_eq(array[i], CC.zero(), err));
}
}
}
#[test]
fn test_size_1_fft() {
let ring = Fp::<17>::RING;
let fft = FFTTableCooleyTuckey::for_zn(&ring, 0).unwrap();
let values: [u64; 1] = [3];
let mut work = values;
fft.unordered_fft(&mut work, &default_memory_provider!(), &fft.ring().identity());
assert_eq!(&work, &values);
fft.unordered_inv_fft(&mut work, &default_memory_provider!(), &fft.ring().identity());
assert_eq!(&work, &values);
assert_eq!(0, fft.unordered_fft_permutation(0));
assert_eq!(0, fft.unordered_fft_permutation_inv(0));
}
#[cfg(any(test, feature = "generic_tests"))]
pub fn generic_test_cooley_tuckey_butterfly<R: RingStore, S: RingStore, I: Iterator<Item = El<R>>>(ring: R, base: S, edge_case_elements: I, test_twiddle: &El<S>)
where R::Type: CanHomFrom<S::Type>,
S::Type: DivisibilityRing
{
let test_inv_twiddle = base.invert(&test_twiddle).unwrap();
let elements = edge_case_elements.collect::<Vec<_>>();
let hom = ring.can_hom(&base).unwrap();
for a in &elements {
for b in &elements {
let mut vector = [ring.clone_el(a), ring.clone_el(b)];
ring.get_ring().butterfly(&hom, &mut vector, &test_twiddle, 0, 1);
assert_el_eq!(&ring, &ring.add_ref_fst(a, ring.mul_ref_fst(b, hom.map_ref(test_twiddle))), &vector[0]);
assert_el_eq!(&ring, &ring.sub_ref_fst(a, ring.mul_ref_fst(b, hom.map_ref(test_twiddle))), &vector[1]);
ring.get_ring().inv_butterfly(&hom, &mut vector, &test_inv_twiddle, 0, 1);
assert_el_eq!(&ring, &ring.int_hom().mul_ref_fst_map(a, 2), &vector[0]);
assert_el_eq!(&ring, &ring.int_hom().mul_ref_fst_map(b, 2), &vector[1]);
let mut vector = [ring.clone_el(a), ring.clone_el(b)];
ring.get_ring().butterfly(&hom, &mut vector, &test_twiddle, 1, 0);
assert_el_eq!(&ring, &ring.add_ref_fst(b, ring.mul_ref_fst(a, hom.map_ref(test_twiddle))), &vector[1]);
assert_el_eq!(&ring, &ring.sub_ref_fst(b, ring.mul_ref_fst(a, hom.map_ref(test_twiddle))), &vector[0]);
ring.get_ring().inv_butterfly(&hom, &mut vector, &test_inv_twiddle, 1, 0);
assert_el_eq!(&ring, &ring.int_hom().mul_ref_fst_map(a, 2), &vector[0]);
assert_el_eq!(&ring, &ring.int_hom().mul_ref_fst_map(b, 2), &vector[1]);
}
}
}
#[test]
fn test_butterfly() {
generic_test_cooley_tuckey_butterfly(zn_static::F17, zn_static::F17, zn_static::F17.elements(), &get_prim_root_of_unity_pow2(zn_static::F17, 4).unwrap());
}