use std::arch::x86_64::*;
use half::f16;
use crate::{
arch::{
emulated::Scalar,
x86_64::{
V3,
algorithms::__load_first_u16_of_16_bytes,
macros::{self, X86Default, X86LoadStore, X86Splat},
},
},
bitmask::BitMask,
constant::Const,
emulated::Emulated,
traits::SIMDVector,
};
macros::x86_define_register!(f16x8, __m128i, BitMask<8, V3>, f16, 8, V3);
macros::x86_define_default!(f16x8, _mm_setzero_si128, "sse2");
impl X86Splat for f16x8 {
#[inline(always)]
fn x86_splat(_: V3, value: f16) -> Self {
Self(unsafe { _mm_set1_epi16(value.to_bits() as i16) })
}
}
impl X86LoadStore for f16x8 {
#[inline(always)]
unsafe fn load_simd(_: V3, ptr: *const f16) -> Self {
Self(unsafe { _mm_loadu_si128(ptr as *const Self::Underlying) })
}
#[inline(always)]
unsafe fn load_simd_masked_logical(arch: V3, ptr: *const f16, mask: Self::Mask) -> Self {
Self::from_array(arch, unsafe {
Emulated::<f16, 8>::load_simd_masked_logical(Scalar, ptr, mask.as_scalar()).to_array()
})
}
#[inline(always)]
unsafe fn load_simd_first(arch: V3, ptr: *const f16, first: usize) -> Self {
Self(unsafe { __load_first_u16_of_16_bytes(arch, ptr as *const u16, first) })
}
#[inline(always)]
unsafe fn store_simd(self, ptr: *mut f16) {
unsafe { _mm_storeu_si128(ptr as *mut Self::Underlying, self.to_underlying()) }
}
#[inline(always)]
unsafe fn store_simd_first(self, ptr: *mut f16, first: usize) {
unsafe { self.emulated().store_simd_first(ptr, first) }
}
#[inline(always)]
unsafe fn store_simd_masked_logical(self, ptr: *mut f16, mask: Self::Mask) {
unsafe {
self.emulated()
.store_simd_masked_logical(ptr, mask.as_scalar())
}
}
}
#[cfg(test)]
mod test_x86_f16 {
use super::*;
use crate::test_utils;
#[cfg(not(miri))]
#[test]
fn test_load() {
if let Some(arch) = V3::new_checked_uncached() {
test_utils::test_load_simd::<f16, 8, f16x8>(arch);
}
}
#[cfg(not(miri))]
#[test]
fn test_store() {
if let Some(arch) = V3::new_checked_uncached() {
test_utils::test_store_simd::<f16, 8, f16x8>(arch);
}
}
#[cfg(not(miri))]
#[test]
fn test_constructors() {
if let Some(arch) = V3::new_checked_uncached() {
test_utils::ops::test_splat::<f16, 8, f16x8>(arch);
}
}
}