llml 0.2.3

Implementation of basic math data types with high level frontend and low level backend
x86_use!();
use cfg_if::cfg_if;
use std::{ops::{Add, Sub, Mul, Div, Neg}, intrinsics::transmute};
use crate::vec::EucVecd2;
use super::{_mm_sum_ps, EucVecd4};

#[repr(transparent)]
pub struct EucVecf4 (pub(crate) __m128);
impl_arith_sse!(EucVecf4, f32);

impl EucVecf4 {
    const DIV_MASK : __m128 = unsafe { *(&[u32::MAX, u32::MAX, u32::MAX, u32::MAX] as *const [u32;4] as *const __m128) };
    const ABS_MASK : __m128 = unsafe { *(&[i32::MAX, i32::MAX, i32::MAX, i32::MAX] as *const [i32;4] as *const __m128) };

    #[inline(always)]
    pub fn new (a: [f32;4]) -> Self {
        unsafe { Self(_mm_set_ps(a[3], a[2], a[1], a[0])) }
    }

    #[inline(always)]
    pub fn from_scal (x: f32) -> Self {
        unsafe { Self(_mm_set1_ps(x)) }
    }

    #[inline(always)]
    pub fn x (&self) -> f32 {
        unsafe { _mm_cvtss_f32(self.0) }
    }

    #[inline(always)]
    pub fn y (&self) -> f32 {
        unsafe { _mm_cvtss_f32(_mm_shuffle_ps(self.0, self.0, _MM_SHUFFLE(1, 1, 1, 1))) }
    }

    #[inline(always)]
    pub fn z (&self) -> f32 {
        unsafe { _mm_cvtss_f32(_mm_shuffle_ps(self.0, self.0, _MM_SHUFFLE(2, 2, 2, 2))) }
    }

    #[inline(always)]
    pub fn w (&self) -> f32 {
        unsafe { _mm_cvtss_f32(_mm_shuffle_ps(self.0, self.0, _MM_SHUFFLE(3, 3, 3, 3))) }
    }

    #[inline(always)]
    pub fn sum (self) -> f32 {
        unsafe { _mm_sum_ps(self.0) }
    }

    #[inline(always)]
    pub fn dot (self, rhs: Self) -> f32 {
        (self * rhs).sum()
    }

    #[inline(always)]
    pub fn norm (self) -> f32 {
        self.dot(self).sqrt()
    }

    #[inline(always)]
    pub fn abs (self) -> Self {
        unsafe { Self(_mm_and_ps(Self::ABS_MASK, self.0)) }
    }

    #[inline(always)]
    pub fn sqrt (self) -> Self {
        unsafe { Self(_mm_sqrt_ps(self.0)) }
    }

    #[inline(always)]
    pub fn sqrt_fast (self) -> Self {
        unsafe { Self(_mm_rcp_ps(_mm_rsqrt_ps(self.0))) }
    }
}

impl Into<[f32;4]> for EucVecf4 {
    #[inline(always)]
    fn into (self) -> [f32;4] {
        unsafe { transmute(self.0) }
    }
}

#[cfg(target_feature = "sse2")]
impl Into<EucVecd4> for EucVecf4 {
    #[inline(always)]
    fn into (self) -> EucVecd4 {
        cfg_if! {
            if #[cfg(target_feature = "avx")] { 
                unsafe { EucVecd4(_mm256_cvtps_pd(self.0)) }
            } else {
                unsafe { 
                    EucVecd4(
                        EucVecd2(_mm_cvtps_pd(self.0)), 
                        EucVecd2(_mm_cvtps_pd(_mm_shuffle_ps(self.0, self.0, _MM_SHUFFLE(1, 0, 3, 2))))
                    ) 
                }
            }
        }
    }
}