#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
use std::{
ops::{Add, Div, Mul, Sub},
slice::from_raw_parts,
};
use crate::{is_shuffle_arg, shuffle_mask, Check, True};
#[repr(transparent)]
#[derive(Copy, Clone)]
pub struct Vector {
data: __m128,
}
impl Add for Vector {
type Output = Self;
#[inline(always)]
fn add(self, rhs: Self) -> Self {
Self {
data: unsafe { _mm_add_ps(self.data, rhs.data) },
}
}
}
impl Default for Vector {
#[inline(always)]
fn default() -> Self {
Self {
data: unsafe { _mm_setzero_ps() },
}
}
}
impl Div for Vector {
type Output = Self;
#[inline(always)]
fn div(self, rhs: Self) -> Self {
Self {
data: unsafe { _mm_div_ps(self.data, rhs.data) },
}
}
}
impl Div<f32> for Vector {
type Output = Self;
#[inline(always)]
fn div(self, rhs: f32) -> Self {
Self {
data: unsafe { _mm_div_ps(self.data, _mm_set1_ps(rhs)) },
}
}
}
impl Mul for Vector {
type Output = Self;
#[inline(always)]
fn mul(self, rhs: Self) -> Self {
Self {
data: unsafe { _mm_mul_ps(self.data, rhs.data) },
}
}
}
impl Mul<f32> for Vector {
type Output = Self;
#[inline(always)]
fn mul(self, rhs: f32) -> Self {
Self {
data: unsafe { _mm_mul_ps(self.data, _mm_set1_ps(rhs)) },
}
}
}
impl PartialEq for Vector {
#[inline(always)]
fn eq(&self, other: &Self) -> bool {
unsafe {
let vcmp = _mm_castps_si128(_mm_cmpeq_ps(self.data, other.data));
_mm_movemask_epi8(vcmp) == 0xffff
}
}
}
impl Sub for Vector {
type Output = Self;
#[inline(always)]
fn sub(self, rhs: Self) -> Self {
Self {
data: unsafe { _mm_sub_ps(self.data, rhs.data) },
}
}
}
impl Vector {
#[inline(always)]
pub fn new(x: f32, y: f32, z: f32, w: f32) -> Self {
Self {
data: unsafe { _mm_set_ps(w, z, y, x) },
}
}
#[inline(always)]
pub fn x(self) -> f32 { unsafe { _mm_cvtss_f32(self.data) } }
#[inline(always)]
pub fn y(self) -> f32 {
unsafe { _mm_cvtss_f32(_mm_shuffle_ps::<{ shuffle_mask(1, 1, 1, 1) }>(self.data, self.data)) }
}
#[inline(always)]
pub fn z(self) -> f32 {
unsafe { _mm_cvtss_f32(_mm_shuffle_ps::<{ shuffle_mask(2, 2, 2, 2) }>(self.data, self.data)) }
}
#[inline(always)]
pub fn w(self) -> f32 {
unsafe { _mm_cvtss_f32(_mm_shuffle_ps::<{ shuffle_mask(3, 3, 3, 3) }>(self.data, self.data)) }
}
#[inline(always)]
pub fn set_x(&mut self, val: f32) {
unsafe {
self.data = _mm_move_ss(self.data, _mm_set_ss(val));
}
}
#[inline(always)]
pub fn set_y(&mut self, val: f32) {
unsafe {
let t = _mm_move_ss(self.data, _mm_set_ss(val)); let t = _mm_shuffle_ps::<{ shuffle_mask(3, 2, 0, 0) }>(t, t); self.data = _mm_move_ss(t, self.data);
}
}
#[inline(always)]
pub fn set_z(&mut self, val: f32) {
unsafe {
let t = _mm_move_ss(self.data, _mm_set_ss(val)); let t = _mm_shuffle_ps::<{ shuffle_mask(3, 0, 1, 0) }>(t, t); self.data = _mm_move_ss(t, self.data);
}
}
#[inline(always)]
pub fn set_w(&mut self, val: f32) {
unsafe {
let t = _mm_move_ss(self.data, _mm_set_ss(val)); let t = _mm_shuffle_ps::<{ shuffle_mask(0, 2, 1, 0) }>(t, t); self.data = _mm_move_ss(t, self.data);
}
}
#[inline(always)]
pub fn get(self, idx: u8) -> f32 {
assert!(idx < 4, "Indexed out of Vector bounds");
unsafe { from_raw_parts((&self as *const Vector) as *const f32, 4)[idx as usize] }
}
#[inline(always)]
pub fn shuffle<const X: u32, const Y: u32, const Z: u32, const W: u32>(self) -> Self
where
Check<{ is_shuffle_arg(X, Y, Z, W) }>: True,
[(); shuffle_mask(W, Z, Y, X) as usize]:,
{
Self {
data: unsafe { _mm_shuffle_ps::<{ shuffle_mask(W, Z, Y, X) }>(self.data, self.data) },
}
}
#[inline(always)]
pub fn shuffle_merge<const X: u32, const Y: u32, const Z: u32, const W: u32>(vec1: Vector, vec2: Vector) -> Self
where
Check<{ is_shuffle_arg(X, Y, Z, W) }>: True,
[(); shuffle_mask(W, Z, Y, X) as usize]:,
{
Self {
data: unsafe { _mm_shuffle_ps::<{ shuffle_mask(W, Z, Y, X) }>(vec1.data, vec2.data) },
}
}
#[inline(always)]
pub fn abs(self) -> Self {
unsafe {
Self {
data: _mm_andnot_ps(SIGNBITS.vec, self.data),
}
}
}
#[inline(always)]
pub fn hsum(self) -> f32 {
unsafe {
let shuf = _mm_shuffle_ps::<{ shuffle_mask(2, 3, 0, 1) }>(self.data, self.data);
let sum = _mm_add_ps(self.data, shuf);
let shuf = _mm_movehl_ps(shuf, sum);
let sum = _mm_add_ps(sum, shuf);
_mm_cvtss_f32(sum)
}
}
#[inline(always)]
pub fn min(lhs: Self, rhs: Self) -> Self {
Self {
data: unsafe { _mm_min_ps(lhs.data, rhs.data) },
}
}
#[inline(always)]
pub fn max(lhs: Self, rhs: Self) -> Self {
Self {
data: unsafe { _mm_max_ps(lhs.data, rhs.data) },
}
}
#[inline(always)]
pub fn adj_add(lhs: Self, rhs: Self) -> Self {
Self {
data: unsafe { _mm_hadd_ps(lhs.data, rhs.data) },
}
}
#[inline(always)]
pub fn adj_sub(lhs: Self, rhs: Self) -> Self {
Self {
data: unsafe { _mm_hsub_ps(lhs.data, rhs.data) },
}
}
#[inline(always)]
pub fn add_sub(lhs: Self, rhs: Self) -> Self {
Self {
data: unsafe { _mm_addsub_ps(lhs.data, rhs.data) },
}
}
}
union Bits {
uints: [u32; 4],
vec: __m128,
}
const SIGNBITS: Bits = Bits {
uints: [0x80000000, 0x80000000, 0x80000000, 0x80000000],
};