use super::*;
impl f32x4 {
#[inline]
#[must_use]
pub fn andnot(self, rhs: Self) -> Self {
magic! {if #[cfg(target_feature="sse")] {
Self { sse: self.sse.andnot(rhs.sse) }
} else {
(!self) & rhs
}}
}
#[inline]
#[must_use]
pub fn is_nan(self) -> Self {
magic! {if #[cfg(target_feature="sse")] {
Self { sse: self.sse.cmp_nan(self.sse) }
} else {
let op = |a:f32| {
if a.is_nan() {
f32::from_bits(u32::max_value())
} else {
0.0
}
};
Self { arr: [
op(self.arr[0]),
op(self.arr[1]),
op(self.arr[2]),
op(self.arr[3]),
] }
}}
}
#[inline]
#[must_use]
pub fn is_ordinary(self) -> Self {
magic! {if #[cfg(target_feature="sse")] {
Self { sse: self.sse.cmp_ordinary(self.sse) }
} else {
let op = |a:f32| {
if !a.is_nan() {
f32::from_bits(u32::max_value())
} else {
0.0
}
};
Self { arr: [
op(self.arr[0]),
op(self.arr[1]),
op(self.arr[2]),
op(self.arr[3]),
] }
}}
}
#[inline]
#[must_use]
pub fn merge(self, a: Self, b: Self) -> Self {
magic! {if #[cfg(target_feature="sse4.1")] {
Self { sse: b.sse.blend_var(a.sse, self.sse) }
} else {
(self & a) | self.andnot(b)
}}
}
#[inline]
#[must_use]
pub fn move_mask(self) -> i32 {
magic! {if #[cfg(target_feature="sse")] {
self.sse.move_mask()
} else {
let mut out = 0_i32;
for i in 0..4 {
if cast::<f32, i32>(self.arr[i]) < 0 {
out |= 1<<i;
}
}
out
}}
}
#[inline]
#[must_use]
pub fn cmp_eq(self, rhs: Self) -> Self {
magic! {if #[cfg(target_feature="sse")] {
Self { sse: self.sse.cmp_eq(rhs.sse) }
} else {
let test = |a, b| {
if a == b {
cast::<u32, f32>(core::u32::MAX)
} else {
cast::<u32, f32>(0)
}
};
Self { arr: [
test(self.arr[0], rhs.arr[0]),
test(self.arr[1], rhs.arr[1]),
test(self.arr[2], rhs.arr[2]),
test(self.arr[3], rhs.arr[3]),
] }
}}
}
#[inline]
#[must_use]
pub fn cmp_ge(self, rhs: Self) -> Self {
magic! {if #[cfg(target_feature="sse")] {
Self { sse: self.sse.cmp_ge(rhs.sse) }
} else {
let test = |a, b| {
if a >= b {
cast::<u32, f32>(core::u32::MAX)
} else {
cast::<u32, f32>(0)
}
};
Self { arr: [
test(self.arr[0], rhs.arr[0]),
test(self.arr[1], rhs.arr[1]),
test(self.arr[2], rhs.arr[2]),
test(self.arr[3], rhs.arr[3]),
] }
}}
}
#[inline]
#[must_use]
pub fn cmp_gt(self, rhs: Self) -> Self {
magic! {if #[cfg(target_feature="sse")] {
Self { sse: self.sse.cmp_gt(rhs.sse) }
} else {
let test = |a, b| {
if a > b {
cast::<u32, f32>(core::u32::MAX)
} else {
cast::<u32, f32>(0)
}
};
Self { arr: [
test(self.arr[0], rhs.arr[0]),
test(self.arr[1], rhs.arr[1]),
test(self.arr[2], rhs.arr[2]),
test(self.arr[3], rhs.arr[3]),
] }
}}
}
#[inline]
#[must_use]
pub fn cmp_le(self, rhs: Self) -> Self {
magic! {if #[cfg(target_feature="sse")] {
Self { sse: self.sse.cmp_le(rhs.sse) }
} else {
let test = |a, b| {
if a <= b {
cast::<u32, f32>(core::u32::MAX)
} else {
cast::<u32, f32>(0)
}
};
Self { arr: [
test(self.arr[0], rhs.arr[0]),
test(self.arr[1], rhs.arr[1]),
test(self.arr[2], rhs.arr[2]),
test(self.arr[3], rhs.arr[3]),
] }
}}
}
#[inline]
#[must_use]
pub fn cmp_lt(self, rhs: Self) -> Self {
magic! {if #[cfg(target_feature="sse")] {
Self { sse: self.sse.cmp_lt(rhs.sse) }
} else {
let test = |a, b| {
if a < b {
cast::<u32, f32>(core::u32::MAX)
} else {
cast::<u32, f32>(0)
}
};
Self { arr: [
test(self.arr[0], rhs.arr[0]),
test(self.arr[1], rhs.arr[1]),
test(self.arr[2], rhs.arr[2]),
test(self.arr[3], rhs.arr[3]),
] }
}}
}
#[inline]
#[must_use]
pub fn cmp_nan(self, rhs: Self) -> Self {
magic! {if #[cfg(target_feature="sse")] {
Self { sse: self.sse.cmp_nan(rhs.sse) }
} else {
let test = |a: f32, b: f32| {
if a.is_nan() || b.is_nan() {
cast::<u32, f32>(core::u32::MAX)
} else {
cast::<u32, f32>(0)
}
};
Self { arr: [
test(self.arr[0], rhs.arr[0]),
test(self.arr[1], rhs.arr[1]),
test(self.arr[2], rhs.arr[2]),
test(self.arr[3], rhs.arr[3]),
] }
}}
}
#[inline]
#[must_use]
pub fn cmp_ne(self, rhs: Self) -> Self {
magic! {if #[cfg(target_feature="sse")] {
Self { sse: self.sse.cmp_ne(rhs.sse) }
} else {
let test = |a, b| {
if a != b {
cast::<u32, f32>(core::u32::MAX)
} else {
cast::<u32, f32>(0)
}
};
Self { arr: [
test(self.arr[0], rhs.arr[0]),
test(self.arr[1], rhs.arr[1]),
test(self.arr[2], rhs.arr[2]),
test(self.arr[3], rhs.arr[3]),
] }
}}
}
#[inline]
#[must_use]
pub fn cmp_nge(self, rhs: Self) -> Self {
magic! {if #[cfg(target_feature="sse")] {
Self { sse: self.sse.cmp_nge(rhs.sse) }
} else {
let test = |a, b| {
if !(a >= b) {
cast::<u32, f32>(core::u32::MAX)
} else {
cast::<u32, f32>(0)
}
};
Self { arr: [
test(self.arr[0], rhs.arr[0]),
test(self.arr[1], rhs.arr[1]),
test(self.arr[2], rhs.arr[2]),
test(self.arr[3], rhs.arr[3]),
] }
}}
}
#[inline]
#[must_use]
pub fn cmp_ngt(self, rhs: Self) -> Self {
magic! {if #[cfg(target_feature="sse")] {
Self { sse: self.sse.cmp_ngt(rhs.sse) }
} else {
let test = |a, b| {
if !(a > b) {
cast::<u32, f32>(core::u32::MAX)
} else {
cast::<u32, f32>(0)
}
};
Self { arr: [
test(self.arr[0], rhs.arr[0]),
test(self.arr[1], rhs.arr[1]),
test(self.arr[2], rhs.arr[2]),
test(self.arr[3], rhs.arr[3]),
] }
}}
}
#[inline]
#[must_use]
pub fn cmp_nle(self, rhs: Self) -> Self {
magic! {if #[cfg(target_feature="sse")] {
Self { sse: self.sse.cmp_nle(rhs.sse) }
} else {
let test = |a, b| {
if !(a <= b) {
cast::<u32, f32>(core::u32::MAX)
} else {
cast::<u32, f32>(0)
}
};
Self { arr: [
test(self.arr[0], rhs.arr[0]),
test(self.arr[1], rhs.arr[1]),
test(self.arr[2], rhs.arr[2]),
test(self.arr[3], rhs.arr[3]),
] }
}}
}
#[inline]
#[must_use]
pub fn cmp_nlt(self, rhs: Self) -> Self {
magic! {if #[cfg(target_feature="sse")] {
Self { sse: self.sse.cmp_nlt(rhs.sse) }
} else {
let test = |a, b| {
if !(a < b) {
cast::<u32, f32>(core::u32::MAX)
} else {
cast::<u32, f32>(0)
}
};
Self { arr: [
test(self.arr[0], rhs.arr[0]),
test(self.arr[1], rhs.arr[1]),
test(self.arr[2], rhs.arr[2]),
test(self.arr[3], rhs.arr[3]),
] }
}}
}
#[inline]
#[must_use]
pub fn ceil(self) -> Self {
magic! {if #[cfg(target_feature="sse4.1")] {
Self { sse: self.sse.ceil() }
} else if #[cfg(target_feature="sse2")] {
Self { sse: self.sse.ceil_sse2() }
} else {
let a: [f32; 4] = cast(self);
cast([a[0].ceil(), a[1].ceil(), a[2].ceil(), a[3].ceil()])
}}
}
#[inline]
#[must_use]
pub fn floor(self) -> Self {
magic! {if #[cfg(target_feature="sse4.1")] {
Self { sse: self.sse.floor() }
} else if #[cfg(target_feature="sse2")] {
Self { sse: self.sse.floor_sse2() }
} else {
let a: [f32; 4] = cast(self);
cast([a[0].floor(), a[1].floor(), a[2].floor(), a[3].floor()])
}}
}
#[inline(always)]
#[must_use]
pub fn abs(self) -> Self {
self & Self::ALL_EXCEPT_SIGN
}
#[inline]
#[must_use]
pub fn cos(self) -> Self {
self.sin_cos().1
}
#[inline]
#[must_use]
pub fn round(self) -> Self {
magic! {if #[cfg(target_feature = "sse4.1")] {
Self { sse: self.sse.round_nearest() }
} else if #[cfg(target_feature="sse2")] {
Self { sse: self.sse.round_i32().round_f32() }
} else if #[cfg(feature = "toolchain_nightly")] {
use core::intrinsics::roundf32;
let a: [f32; 4] = cast(self);
cast(unsafe {
[roundf32(a[0]), roundf32(a[1]), roundf32(a[2]), roundf32(a[3])]
})
} else {
let a: [f32; 4] = cast(self);
cast([a[0].round(), a[1].round(), a[2].round(), a[3].round()])
}}
}
#[inline]
#[must_use]
pub fn sin(self) -> Self {
self.sin_cos().0
}
#[inline]
#[must_use]
pub fn mul_add(self, b: Self, c: Self) -> Self {
magic! {if #[cfg(target_feature = "fma")] {
Self { sse: self.sse.fmadd(b.sse, c.sse) }
} else {
(self * b) + c
}}
}
#[inline]
#[must_use]
pub fn negated_mul_add(self, b: Self, c: Self) -> Self {
magic! {if #[cfg(target_feature = "fma")] {
Self { sse: self.sse.fnmadd(b.sse, c.sse) }
} else {
c - (self * b)
}}
}
#[inline]
#[must_use]
pub fn recip(self) -> Self {
magic! {if #[cfg(target_feature="sse")] {
Self { sse: self.sse.reciprocal() }
} else {
f32x4::from(1.0) / self
}}
}
#[inline]
#[must_use]
pub fn max(self, b: Self) -> Self {
magic! {if #[cfg(target_feature="sse")] {
Self { sse: self.sse.max(b.sse) }
} else {
let a: [f32; 4] = cast(self);
let b: [f32; 4] = cast(b);
cast([
a[0].max(b[0]),
a[1].max(b[1]),
a[2].max(b[2]),
a[3].max(b[3]),
])
}}
}
#[inline]
#[must_use]
pub fn min(self, b: Self) -> Self {
magic! {if #[cfg(target_feature="sse")] {
Self { sse: self.sse.min(b.sse) }
} else {
let a: [f32; 4] = cast(self);
let b: [f32; 4] = cast(b);
cast([
a[0].min(b[0]),
a[1].min(b[1]),
a[2].min(b[2]),
a[3].min(b[3]),
])
}}
}
#[inline]
#[must_use]
pub fn round_i32(self) -> i32x4 {
magic! {if #[cfg(target_feature="sse")] {
i32x4 { sse: self.sse.round_i32() }
} else {
i32x4 { arr: [
self.arr[0] as i32,
self.arr[1] as i32,
self.arr[2] as i32,
self.arr[3] as i32,
]}
}}
}
#[allow(clippy::unreadable_literal)]
#[allow(clippy::cast_possible_wrap)]
#[allow(bad_style)]
#[inline]
#[must_use]
pub fn is_finite(self) -> f32x4 {
const EXPONENT_MASKu: u32 = 0xFF000000_u32;
const EXPONENT_MASKi: i32 = EXPONENT_MASKu as i32;
magic! {if #[cfg(target_feature="sse2")] {
let t1 = self.sse.cast_m128i();
let t2 = t1.shift_left_i32(m128i::splat_i32(1));
let t3 = !(t2 & m128i::splat_i32(EXPONENT_MASKi))
.cmp_eq_i32(m128i::splat_i32(EXPONENT_MASKi));
Self { sse: t3.cast_m128() }
} else {
let op = |f: f32| {
let t1 = f.to_bits();
let t2 = t1 << 1;
let t3 = (t2 & EXPONENT_MASKu) != EXPONENT_MASKu;
if t3 {
f32::from_bits(u32::max_value())
} else {
0.0
}
};
Self { arr: [
op(self.arr[0]),
op(self.arr[1]),
op(self.arr[2]),
op(self.arr[3]),
]}
}}
}
#[inline(always)]
#[must_use]
pub fn cast_i32x4(self) -> i32x4 {
magic! {if #[cfg(target_feature="sse2")] {
i32x4 { sse: self.sse.cast_m128i() }
} else {
cast(self)
}}
}
#[inline]
#[must_use]
pub fn copysign(self, b: Self) -> Self {
self ^ (b & Self::NEGATIVE_ZERO)
}
#[inline]
#[must_use]
pub fn clamp(self, min: Self, max: Self) -> Self {
self.max(min).min(max)
}
#[inline]
#[must_use]
pub fn signum(self) -> Self {
self.is_nan().merge(f32x4::NAN, Self::ONE.copysign(self))
}
#[inline]
#[must_use]
pub fn tan(self) -> Self {
let (s, c) = self.sin_cos();
s / c
}
#[inline(always)]
#[must_use]
fn polynomial_2(self, c0: Self, c1: Self, c2: Self) -> Self {
let self2 = self * self;
self2.mul_add(c2, self.mul_add(c1, c0))
}
#[allow(clippy::unreadable_literal)]
#[allow(clippy::excessive_precision)]
#[allow(clippy::many_single_char_names)]
#[allow(bad_style)]
#[allow(clippy::missing_inline_in_public_items)]
#[must_use]
pub fn sin_cos(self) -> (Self, Self) {
const_f32_as_f32x4!(DP1F, 0.78515625_f32 * 2.0);
const_f32_as_f32x4!(DP2F, 2.4187564849853515625E-4_f32 * 2.0);
const_f32_as_f32x4!(DP3F, 3.77489497744594108E-8_f32 * 2.0);
const_f32_as_f32x4!(P0sinf, -1.6666654611E-1);
const_f32_as_f32x4!(P1sinf, 8.3321608736E-3);
const_f32_as_f32x4!(P2sinf, -1.9515295891E-4);
const_f32_as_f32x4!(P0cosf, 4.166664568298827E-2);
const_f32_as_f32x4!(P1cosf, -1.388731625493765E-3);
const_f32_as_f32x4!(P2cosf, 2.443315711809948E-5);
let xa = self.abs();
let y = (xa * (f32x4::from(2.0) / Self::PI)).round();
let q: i32x4 = y.round_i32();
let x = y.negated_mul_add(
DP3F,
y.negated_mul_add(DP2F, y.negated_mul_add(DP1F, xa)),
);
let x2 = x * x;
let mut s = x2.polynomial_2(P0sinf, P1sinf, P2sinf) * (x * x2) + x;
let mut c = x2.polynomial_2(P0cosf, P1cosf, P2cosf) * (x2 * x2)
+ Self::HALF.negated_mul_add(x2, Self::ONE);
let swap = !(q & i32x4::ONE).cmp_eq(i32x4::ZERO);
const_i32_as_i32x4!(BIG_THRESHOLD, 0x2000000);
let mut overflow: f32x4 = q.cmp_gt(BIG_THRESHOLD).cast_f32x4();
overflow &= xa.is_finite();
s = f32x4::merge(overflow, f32x4::ZERO, s);
c = f32x4::merge(overflow, f32x4::ONE, c);
let mut sin1 = f32x4::merge(swap.cast_f32x4(), c, s);
let sign_sin: i32x4 = (q << 30) ^ self.cast_i32x4();
sin1 = sin1.copysign(sign_sin.cast_f32x4());
let mut cos1 = f32x4::merge(swap.cast_f32x4(), s, c);
let sign_cos: i32x4 = ((q + i32x4::ONE) & i32x4::from(2)) << 30;
cos1 ^= sign_cos.cast_f32x4();
(sin1, cos1)
}
#[inline]
#[allow(clippy::unreadable_literal)]
#[allow(clippy::excessive_precision)]
#[must_use]
pub fn to_degrees(self) -> Self {
const_f32_as_f32x4!(
pub RAD_TO_DEG_RATIO, 57.2957795130823208767981548141051703_f32
);
self * RAD_TO_DEG_RATIO
}
#[inline]
#[must_use]
pub fn to_radians(self) -> Self {
const_f32_as_f32x4!(
pub DEG_TO_RAD_RATIO, core::f32::consts::PI / 180.0_f32
);
self * DEG_TO_RAD_RATIO
}
#[inline]
#[must_use]
pub fn fract(self) -> Self {
self - self.trunc()
}
#[inline]
#[must_use]
pub fn sqrt(self) -> Self {
magic! { if #[cfg(target_feature = "sse")] {
Self { sse: self.sse.sqrt() }
} else if #[cfg(feature = "toolchain_nightly")] {
use core::intrinsics::sqrtf32;
let a: [f32; 4] = cast(self);
cast(unsafe {
[sqrtf32(a[0]), sqrtf32(a[1]), sqrtf32(a[2]), sqrtf32(a[3])]
})
} else {
let a: [f32; 4] = cast(self);
cast([a[0].sqrt(), a[1].sqrt(), a[2].sqrt(), a[3].sqrt()])
}}
}
}