use crate::numbers::*;
#[cfg(all(feature = "use_sse2", target_feature = "sse2"))]
use simd;
#[cfg(all(feature = "use_avx2", target_feature = "avx2"))]
use simd::x86::avx as simdavx;
#[cfg(all(feature = "use_sse2", target_feature = "sse2"))]
use simd::x86::sse2 as simdsse;
use std;
use std::mem;
use std::ops::*;
mod simd_partition;
pub use self::simd_partition::{EdgeIteratorMut, IndexedEdgeIteratorMut, SimdPartition};
pub trait Simd<T>: Sized
where
T: Sized + Sync + Send,
{
type Array;
fn to_array(self) -> Self::Array;
type ComplexArray;
const LEN: usize;
fn from_complex(value: Complex<T>) -> Self;
fn add_real(self, value: T) -> Self;
fn add_complex(self, value: Complex<T>) -> Self;
fn scale_real(self, value: T) -> Self;
fn scale_complex(self, value: Complex<T>) -> Self;
fn complex_abs_squared(self) -> Self;
fn complex_abs(self) -> Self;
fn sqrt(self) -> Self;
fn store_half(self, target: &mut [T], index: usize);
fn mul_complex(self, value: Self) -> Self;
fn div_complex(self, value: Self) -> Self;
fn sum_real(&self) -> T;
fn sum_complex(&self) -> Complex<T>;
fn max(self, other: Self) -> Self;
fn min(self, other: Self) -> Self;
fn swap_iq(self) -> Self;
}
pub trait SimdFrom<T> {
fn regfrom(src: T) -> Self;
}
pub trait SimdGeneric<T>:
Simd<T>
+ SimdApproximations<T>
+ Add<Self, Output = Self>
+ Sub<Self, Output = Self>
+ Mul<Self, Output = Self>
+ Div<Self, Output = Self>
+ Copy
+ Clone
+ Sync
+ Send
+ Sized
+ Zero
where
T: Sized + Sync + Send,
{
fn calc_data_alignment_reqs(array: &[T]) -> SimdPartition<T>;
fn from_array(array: Self::Array) -> Self;
fn to_complex_array(self) -> Self::ComplexArray;
fn from_complex_array(array: Self::ComplexArray) -> Self;
fn iter_over_vector<F>(self, op: F) -> Self
where
F: FnMut(T) -> T;
fn iter_over_complex_vector<F>(self, op: F) -> Self
where
F: FnMut(Complex<T>) -> Complex<T>;
fn array_to_regs(array: &[T]) -> &[Self];
fn array_to_regs_mut(array: &mut [T]) -> &mut [Self];
fn load(array: &[T], idx: usize) -> Self;
fn store(self, array: &mut [T], index: usize);
fn extract(self, idx: u32) -> T;
fn splat(value: T) -> Self;
}
pub trait SimdApproximations<T> {
fn ln_approx(self) -> Self;
fn exp_approx(self) -> Self;
fn sin_approx(self) -> Self;
fn cos_approx(self) -> Self;
fn sin_cos_approx(self, is_sin: bool) -> Self;
}
fn get_alignment_offset(addr: usize, reg_len: usize) -> usize {
addr % reg_len
}
macro_rules! simd_generic_impl {
($data_type:ident, $mod: ident::$reg:ident) => {
impl Zero for $mod::$reg {
fn zero() -> Self {
Self::splat(0.0)
}
}
impl SimdGeneric<$data_type> for $mod::$reg {
#[inline]
fn calc_data_alignment_reqs(array: &[$data_type]) -> SimdPartition<$data_type> {
let data_length = array.len();
let addr = array.as_ptr();
let left = get_alignment_offset(addr as usize, mem::size_of::<Self>());
assert!(left % mem::size_of::<$data_type>() == 0);
let left = left / mem::size_of::<$data_type>();
if left + Self::LEN > data_length {
SimdPartition::new_all_scalar(data_length)
} else {
let right = (data_length - left) % Self::LEN;
SimdPartition::new_simd(left, right, data_length)
}
}
#[inline]
fn from_array(array: Self::Array) -> Self {
Self::load(&array, 0)
}
#[inline]
fn to_complex_array(self) -> Self::ComplexArray {
unsafe { mem::transmute(self.to_array()) }
}
#[inline]
fn from_complex_array(array: Self::ComplexArray) -> Self {
Self::from_array(unsafe { mem::transmute(array) })
}
#[inline]
fn iter_over_vector<F>(self, mut op: F) -> Self
where
F: FnMut($data_type) -> $data_type,
{
let mut array = self.to_array();
for n in &mut array {
*n = op(*n);
}
Self::from_array(array)
}
#[inline]
fn iter_over_complex_vector<F>(self, mut op: F) -> Self
where
F: FnMut(Complex<$data_type>) -> Complex<$data_type>,
{
let mut array = self.to_complex_array();
for n in &mut array[0..Self::LEN / 2] {
*n = op(*n);
}
Self::from_complex_array(array)
}
#[inline]
fn array_to_regs(array: &[$data_type]) -> &[Self] {
if array.is_empty() {
return &[];
}
assert_eq!(
get_alignment_offset(array.as_ptr() as usize, mem::size_of::<Self>()),
0
);
super::transmute_slice(array)
}
#[inline]
fn array_to_regs_mut(array: &mut [$data_type]) -> &mut [Self] {
if array.is_empty() {
return &mut [];
}
assert_eq!(
get_alignment_offset(array.as_ptr() as usize, mem::size_of::<Self>()),
0
);
super::transmute_slice_mut(array)
}
#[inline]
fn load(array: &[$data_type], idx: usize) -> Self {
Self::load(array, idx)
}
#[inline]
fn store(self, array: &mut [$data_type], index: usize) {
Self::store(self, array, index);
}
#[inline]
fn extract(self, idx: u32) -> $data_type {
Self::extract(self, idx)
}
#[inline]
fn splat(value: $data_type) -> Self {
Self::splat(value)
}
}
};
}
#[cfg(feature = "use_avx512")]
mod avx512;
#[cfg(feature = "use_avx512")]
simd_generic_impl!(f32, simd::f32x16);
#[cfg(feature = "use_avx512")]
simd_generic_impl!(f64, simd::f64x8);
#[cfg(all(feature = "use_avx2", target_feature = "avx2"))]
mod avx;
#[cfg(all(feature = "use_avx2", target_feature = "avx2"))]
simd_generic_impl!(f32, simdavx::f32x8);
#[cfg(all(feature = "use_avx2", target_feature = "avx2"))]
simd_generic_impl!(f64, simdavx::f64x4);
#[cfg(feature = "use_sse2")]
mod sse;
#[cfg(all(feature = "use_sse2", target_feature = "sse2"))]
simd_generic_impl!(f32, simd::f32x4);
#[cfg(all(feature = "use_sse2", target_feature = "sse2"))]
simd_generic_impl!(f64, simdsse::f64x2);
#[cfg(feature = "use_simd")]
mod approximations;
mod approx_fallback;
pub mod fallback;
simd_generic_impl!(f32, fallback::f32x4);
simd_generic_impl!(f64, fallback::f64x2);
pub struct RegType<Reg> {
_type: std::marker::PhantomData<Reg>,
}
impl<Reg> RegType<Reg> {
pub fn new() -> Self {
RegType {
_type: std::marker::PhantomData,
}
}
}
macro_rules! sel_reg(
($self_:ident.$method: ident::<$type: ident>($($args: expr),*)) => {
if is_x86_feature_detected!("avx512vl") && cfg!(feature="use_avx512") {
$self_.$method(RegType::<<$type as ToSimd>::RegAvx512>::new(), $($args),*)
} else if is_x86_feature_detected!("avx2") && cfg!(feature="use_avx2") {
$self_.$method(RegType::<<$type as ToSimd>::RegAvx>::new(), $($args),*)
} else if is_x86_feature_detected!("sse2") && cfg!(feature="use_sse2") {
$self_.$method(RegType::<<$type as ToSimd>::RegSse>::new(), $($args),*)
} else {
$self_.$method(RegType::<<$type as ToSimd>::RegFallback>::new(), $($args),*)
}
};
($method: ident::<$type: ident>($($args: expr),*)) => {
if is_x86_feature_detected!("avx512vl") && cfg!(feature="use_avx512") {
$method(RegType::<<$type as ToSimd>::RegAvx512>::new(), $($args),*)
} else if is_x86_feature_detected!("avx2") && cfg!(feature="use_avx2") && cfg!(target_feature="avx2") {
$method(RegType::<<$type as ToSimd>::RegAvx>::new(), $($args),*)
} else if is_x86_feature_detected!("sse2") && cfg!(feature="use_sse2")&& cfg!(target_feature="sse2") {
$method(RegType::<<$type as ToSimd>::RegSse>::new(), $($args),*)
} else {
$method(RegType::<<$type as ToSimd>::RegFallback>::new(), $($args),*)
}
};
);
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn get_alignment_offset_test() {
let reg_len = mem::size_of::<fallback::f64x2>();
assert_eq!(reg_len, 16);
assert_eq!(get_alignment_offset(0, reg_len), 0);
assert_eq!(get_alignment_offset(8, reg_len), 8);
assert_eq!(get_alignment_offset(16, reg_len), 0);
assert_eq!(get_alignment_offset(24, reg_len), 8);
}
#[cfg(all(feature = "use_avx2", target_feature = "avx2"))]
mod avx {
use super::super::*;
#[test]
fn get_alignment_offset_test() {
let reg_len = mem::size_of::<simdavx::f64x4>();
assert_eq!(reg_len, 32);
assert_eq!(get_alignment_offset(0, reg_len), 0);
assert_eq!(get_alignment_offset(8, reg_len), 8);
assert_eq!(get_alignment_offset(16, reg_len), 16);
assert_eq!(get_alignment_offset(24, reg_len), 24);
assert_eq!(get_alignment_offset(32, reg_len), 0);
assert_eq!(get_alignment_offset(40, reg_len), 8);
}
}
}