use crate::{pessimize_asm_values, pessimize_copy};
#[allow(unused)]
#[cfg(target_arch = "x86")]
use core::arch::x86 as target_arch;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64 as target_arch;
use target_arch::CpuidResult;
#[cfg(any(target_feature = "sse", doc))]
use target_arch::__m128;
#[cfg(any(target_feature = "avx2", doc))]
use target_arch::__m256i;
#[cfg(any(target_feature = "sse2", doc))]
use target_arch::{__m128d, __m128i};
#[cfg(any(target_feature = "avx", doc))]
use target_arch::{__m256, __m256d};
#[cfg(any(target_feature = "avx512f", doc))]
use target_arch::{__m512, __m512d, __m512i};
pessimize_asm_values!(allow(missing_docs) { reg_byte: (i8, u8), reg: (i16, u16, i32, u32, isize, usize) });
#[cfg(target_arch = "x86_64")]
pessimize_asm_values!(allow(missing_docs) { reg: (i64, u64) });
#[cfg(all(target_arch = "x86", any(target_feature = "sse2", doc)))]
pessimize_asm_values!(
doc(cfg(target_feature = "sse2"))
{
xmm_reg: (i64, u64)
}
);
#[cfg(any(target_feature = "sse", doc))]
pessimize_asm_values!(
allow(missing_docs)
{
xmm_reg: (f32)
}
);
#[cfg(all(not(target_feature = "sse"), not(doc)))]
pessimize_asm_values!(
allow(missing_docs)
{
reg: (f32)
}
);
#[cfg(any(target_feature = "sse2", doc))]
pessimize_asm_values!(
cfg_attr(target_arch = "x86", doc(cfg(target_feature = "sse2")))
{
xmm_reg: (f64)
}
);
#[cfg(any(target_feature = "sse", doc))]
pessimize_asm_values!(
cfg_attr(target_arch = "x86", doc(cfg(target_feature = "sse")))
{
xmm_reg: (__m128)
}
);
#[cfg(any(target_feature = "sse2", doc))]
pessimize_asm_values!(
cfg_attr(target_arch = "x86", doc(cfg(target_feature = "sse2")))
{
xmm_reg: (__m128d, __m128i)
}
);
#[cfg(any(target_feature = "avx", doc))]
pessimize_asm_values!(
doc(cfg(target_feature = "avx"))
{
ymm_reg: (__m256, __m256d)
}
);
#[cfg(any(target_feature = "avx2", doc))]
pessimize_asm_values!(
doc(cfg(target_feature = "avx2"))
{
ymm_reg: (__m256i)
}
);
#[cfg(any(target_feature = "avx512f", doc))]
#[cfg_attr(feature = "nightly", doc(cfg(target_feature = "avx512f")))]
pub mod avx512 {
use super::*;
use crate::Pessimize;
use core::arch::asm;
#[cfg(any(target_feature = "avx512bf16", doc))]
use target_arch::__m512bh;
#[cfg(any(all(target_feature = "avx512vl", target_feature = "avx512bf16"), doc))]
use target_arch::{__m128bh, __m256bh};
pessimize_asm_values!(
doc(cfg(target_feature = "avx512f"))
{
zmm_reg: (__m512, __m512d, __m512i)
}
);
#[cfg(any(target_feature = "avx512bf16", doc))]
pessimize_asm_values!(
doc(cfg(target_feature = "avx512bf16"))
{
zmm_reg: (__m512bh)
}
);
#[cfg(any(all(target_feature = "avx512vl", target_feature = "avx512bf16"), doc))]
pessimize_asm_values!(
doc(cfg(all(
target_feature = "avx512vl",
target_feature = "avx512bf16"
)))
{ xmm_reg: (__m128bh), ymm_reg: (__m256bh) }
);
pub struct Mask<T>(pub T);
macro_rules! pessimize_mask {
(
$doc_cfg:meta
{ $($mask_impl:ty),* }
) => {
$(
#[cfg_attr(feature = "nightly", $doc_cfg)]
unsafe impl Pessimize for Mask<$mask_impl> {
#[inline]
fn hide(mut self) -> Self {
unsafe {
asm!("/* {0} */", inout(kreg) self.0, options(preserves_flags, nostack, nomem));
}
self
}
#[inline]
fn assume_read(&self) {
unsafe {
asm!("/* {0} */", in(kreg) self.0, options(preserves_flags, nostack, nomem))
}
}
#[inline]
fn assume_accessed(&mut self) {
Self::assume_read(self)
}
#[inline]
fn assume_accessed_imut(&self) {
Self::assume_read(self)
}
}
)*
};
}
pessimize_mask!(doc(cfg(target_feature = "avx512f")) { i8, u8, i16, u16 });
#[cfg(any(target_feature = "avx512bw", doc))]
pessimize_mask!(
doc(cfg(target_feature = "avx512bw"))
{ i32, u32, i64, u64 }
);
}
#[allow(unused)]
#[cfg(any(feature = "safe_arch", test))]
mod safe_arch_types {
use super::*;
use crate::pessimize_newtypes;
#[cfg(any(target_feature = "sse", doc))]
use safe_arch::m128;
#[cfg(any(target_feature = "avx2", doc))]
use safe_arch::m256i;
#[cfg(any(target_feature = "sse2", doc))]
use safe_arch::{m128d, m128i};
#[cfg(any(target_feature = "avx", doc))]
use safe_arch::{m256, m256d};
#[cfg(any(target_feature = "avx512f", doc))]
use safe_arch::{m512, m512d, m512i};
#[cfg(any(target_feature = "sse", doc))]
pessimize_newtypes!(
doc(cfg(all(feature = "safe_arch", target_feature = "sse")))
{ m128{ __m128 } }
);
#[cfg(any(target_feature = "sse2", doc))]
pessimize_newtypes!(
doc(cfg(all(feature = "safe_arch", target_feature = "sse2")))
{
m128d{ __m128d },
m128i{ __m128i }
}
);
#[cfg(any(target_feature = "avx", doc))]
pessimize_newtypes!(
doc(cfg(all(feature = "safe_arch", target_feature = "avx")))
{
m256{ __m256 },
m256d{ __m256d }
}
);
#[cfg(any(target_feature = "avx2", doc))]
pessimize_newtypes!(
doc(cfg(all(feature = "safe_arch", target_feature = "avx2")))
{ m256i{ __m256i } }
);
#[cfg(any(target_feature = "avx512f", doc))]
pessimize_newtypes!(
doc(cfg(all(feature = "safe_arch", target_feature = "avx512f")))
{
m512{ __m512 },
m512d{ __m512d },
m512i{ __m512i }
}
);
}
#[allow(unused)]
#[cfg(feature = "nightly")]
mod portable_simd {
use super::*;
use crate::{pessimize_copy, pessimize_into_from};
use core::simd::{Mask, Simd};
#[cfg(any(target_feature = "avx512f", doc))]
use target_arch::{__m512, __m512d, __m512i};
#[cfg(any(target_feature = "sse", doc))]
pessimize_into_from!(
doc(cfg(all(feature = "nightly", target_feature = "sse")))
{ __m128: (Simd<f32, 4>) }
);
#[cfg(any(target_feature = "sse2", doc))]
pessimize_into_from!(
doc(cfg(all(feature = "nightly", target_feature = "sse2")))
{
__m128d: (Simd<f64, 2>),
__m128i:
(
Simd<i8, 16>,
Simd<u8, 16>,
Simd<i16, 8>,
Simd<u16, 8>,
Simd<i32, 4>,
Simd<u32, 4>,
Simd<i64, 2>,
Simd<u64, 2>
)
}
);
#[cfg(any(target_feature = "avx", doc))]
pessimize_into_from!(
doc(cfg(all(feature = "nightly", target_feature = "avx")))
{ __m256: (Simd<f32, 8>), __m256d: (Simd<f64, 4>) }
);
#[cfg(any(target_feature = "avx2", doc))]
pessimize_into_from!(
doc(cfg(all(feature = "nightly", target_feature = "avx2")))
{
__m256i:
(
Simd<i8, 32>,
Simd<u8, 32>,
Simd<i16, 16>,
Simd<u16, 16>,
Simd<i32, 8>,
Simd<u32, 8>,
Simd<i64, 4>,
Simd<u64, 4>
)
}
);
#[cfg(any(target_feature = "avx512f", doc))]
pessimize_into_from!(
doc(cfg(all(feature = "nightly", target_feature = "avx512f")))
{
__m512: (Simd<f32, 16>),
__m512d: (Simd<f64, 8>),
__m512i: (Simd<i32, 16>, Simd<u32, 16>, Simd<i64, 8>, Simd<u64, 8>)
}
);
#[cfg(any(all(target_feature = "avx512f", target_feature = "avx512bw"), doc))]
pessimize_into_from!(
doc(cfg(all(
feature = "nightly",
target_feature = "avx512bw"
)))
{ __m512i: (Simd<i8, 64>, Simd<u8, 64>, Simd<i16, 32>, Simd<u16, 32>) }
);
#[cfg(any(target_feature = "avx512f", doc))]
macro_rules! pessimize_portable_mask {
(
$doc_cfg:meta
{ $($mask_type:ty),* }
) => {
#[cfg(target_feature = "avx512bw")]
pessimize_copy!(
$doc_cfg
{
$(
avx512::Mask<u64>: (
$mask_type: (
|self_: $mask_type| avx512::Mask(self_.to_bitmask()),
|x: Self::Pessimized| Self::from_bitmask(x.0)
)
)
),*
}
);
#[cfg(not(target_feature = "avx512bw"))]
pessimize_copy!(
$doc_cfg
{
$(
avx512::Mask<u16>: (
$mask_type: (
|self_: $mask_type| avx512::Mask(self_.to_bitmask() as u16),
|x: Self::Pessimized| Self::from_bitmask(x.0 as _)
)
)
),*
}
);
};
}
#[cfg(any(target_feature = "avx512f", doc))]
pessimize_portable_mask!(
doc(cfg(all(feature = "nightly", target_feature = "avx512f")))
{ Mask<i32, 16>, Mask<i64, 8> }
);
#[cfg(any(target_feature = "avx512bw", doc))]
pessimize_portable_mask!(
doc(cfg(all(
feature = "nightly",
target_feature = "avx512bw"
)))
{ Mask<i8, 64>, Mask<i16, 32> }
);
#[cfg(any(target_feature = "avx512vl", doc))]
pessimize_portable_mask!(
doc(cfg(all(
feature = "nightly",
target_feature = "avx512vl"
)))
{
Mask<i8, 16>,
Mask<i16, 8>, Mask<i16, 16>,
Mask<i32, 4>, Mask<i32, 8>,
Mask<i64, 2>, Mask<i64, 4>
}
);
#[cfg(any(all(target_feature = "avx512bw", target_feature = "avx512vl"), doc))]
pessimize_portable_mask!(
doc(cfg(all(
feature = "nightly",
target_feature = "avx512bw",
target_feature = "avx512vl"
)))
{ Mask<i8, 32> }
);
}
pessimize_copy!(
allow(missing_docs)
{
(u32, u32, u32, u32): (
CpuidResult : (
|Self { eax, ebx, ecx, edx }| (eax, ebx, ecx, edx),
|(eax, ebx, ecx, edx)| Self { eax, ebx, ecx, edx }
)
)
}
);
#[allow(unused)]
#[cfg(test)]
mod tests {
use super::*;
#[cfg(feature = "nightly")]
use crate::tests::test_portable_simd;
use crate::{
tests::{test_simd, test_unoptimized_value, test_unoptimized_value_type, test_value},
Pessimize,
};
#[cfg(feature = "nightly")]
use std::{
fmt::Debug,
simd::{Mask, MaskElement, Simd},
};
#[cfg(feature = "nightly")]
fn test_portable_simd_mask<T: Debug + MaskElement + PartialEq + Unpin, const LANES: usize>()
where
Mask<T, LANES>: Pessimize,
{
test_simd::<bool, LANES, Mask<T, LANES>>(false, true)
}
#[cfg(feature = "nightly")]
macro_rules! portable_simd_tests {
( $( ( $elem:ty, $lanes:expr ) ),* ) => {
$( test_portable_simd::<$elem, $lanes>(<$elem>::MIN, <$elem>::MAX); )*
}
}
#[cfg(feature = "nightly")]
macro_rules! portable_mask_tests {
( $( ( $elem:ty, $lanes:expr ) ),* ) => {
$( test_portable_simd_mask::<$elem, $lanes>(); )*
}
}
#[cfg(feature = "nightly")]
macro_rules! portable_simd_tests_optim {
( $( ( $elem:ty, $lanes:expr ) ),* ) => {
$( test_unoptimized_value_type::<Simd<$elem, $lanes>>(); )*
}
}
#[cfg(feature = "nightly")]
macro_rules! portable_mask_tests_optim {
( $( ( $elem:ty, $lanes:expr ) ),* ) => {
$( test_unoptimized_value_type::<Mask<$elem, $lanes>>(); )*
}
}
#[cfg(target_feature = "sse")]
#[test]
fn sse() {
use safe_arch::m128;
test_simd::<f32, 4, m128>(f32::MIN, f32::MAX);
#[cfg(feature = "nightly")]
{
portable_simd_tests!((f32, 4));
#[cfg(target_feature = "avx512vl")]
portable_mask_tests!((i32, 4));
}
}
#[cfg(target_feature = "sse")]
#[test]
#[ignore]
fn sse_optim() {
use safe_arch::m128;
test_unoptimized_value_type::<m128>();
#[cfg(feature = "nightly")]
{
portable_simd_tests_optim!((f32, 4));
#[cfg(target_feature = "avx512vl")]
portable_mask_tests_optim!((i32, 4));
}
}
#[cfg(target_feature = "sse2")]
#[test]
fn sse2() {
use safe_arch::{m128d, m128i};
test_simd::<f64, 2, m128d>(f64::MIN, f64::MAX);
test_simd::<i8, 16, m128i>(i8::MIN, i8::MAX);
#[cfg(feature = "nightly")]
{
portable_simd_tests!(
(i8, 16),
(u8, 16),
(i16, 8),
(u16, 8),
(i32, 4),
(u32, 4),
(i64, 2),
(u64, 2)
);
#[cfg(target_feature = "avx512vl")]
portable_mask_tests!((i8, 16), (i16, 8), (i64, 2));
}
}
#[cfg(target_feature = "sse2")]
#[test]
#[ignore]
fn sse2_optim() {
use safe_arch::{m128d, m128i};
test_unoptimized_value_type::<m128d>();
test_unoptimized_value_type::<m128i>();
#[cfg(feature = "nightly")]
{
portable_simd_tests_optim!(
(i8, 16),
(u8, 16),
(i16, 8),
(u16, 8),
(i32, 4),
(u32, 4),
(i64, 2),
(u64, 2)
);
#[cfg(target_feature = "avx512vl")]
portable_mask_tests_optim!((i8, 16), (i16, 8), (i64, 2));
}
}
#[cfg(target_feature = "avx")]
#[test]
fn avx() {
use safe_arch::{m256, m256d};
test_simd::<f32, 8, m256>(f32::MIN, f32::MAX);
test_simd::<f64, 4, m256d>(f64::MIN, f64::MAX);
#[cfg(feature = "nightly")]
{
portable_simd_tests!((f32, 8), (f64, 4));
#[cfg(target_feature = "avx512vl")]
portable_mask_tests!((i32, 8), (i64, 4));
}
}
#[cfg(target_feature = "avx")]
#[test]
#[ignore]
fn avx_optim() {
use safe_arch::{m256, m256d};
test_unoptimized_value_type::<m256>();
test_unoptimized_value_type::<m256d>();
#[cfg(feature = "nightly")]
{
portable_simd_tests_optim!((f32, 8), (f64, 4));
#[cfg(target_feature = "avx512vl")]
portable_mask_tests_optim!((i32, 8), (i64, 4));
}
}
#[cfg(target_feature = "avx2")]
#[test]
fn avx2() {
use safe_arch::m256i;
test_simd::<i8, 32, m256i>(i8::MIN, i8::MAX);
#[cfg(feature = "nightly")]
{
portable_simd_tests!(
(i8, 32),
(u8, 32),
(i16, 16),
(u16, 16),
(i32, 8),
(u32, 8),
(i64, 4),
(u64, 4)
);
#[cfg(target_feature = "avx512vl")]
{
portable_mask_tests!((i16, 16));
#[cfg(target_feature = "avx512bw")]
portable_mask_tests!((i8, 32));
}
}
}
#[cfg(target_feature = "avx2")]
#[test]
#[ignore]
fn avx2_optim() {
use safe_arch::m256i;
test_unoptimized_value_type::<m256i>();
#[cfg(feature = "nightly")]
{
portable_simd_tests_optim!(
(i8, 32),
(u8, 32),
(i16, 16),
(u16, 16),
(i32, 8),
(u32, 8),
(i64, 4),
(u64, 4)
);
#[cfg(target_feature = "avx512vl")]
{
portable_mask_tests_optim!((i16, 16));
#[cfg(target_feature = "avx512bw")]
portable_mask_tests_optim!((i8, 32));
}
}
}
#[cfg(target_feature = "avx512f")]
#[test]
fn avx512f() {
use safe_arch::{m512, m512d, m512i};
test_simd::<i32, 16, m512i>(i32::MIN, i32::MAX);
test_simd::<u32, 16, m512i>(u32::MIN, u32::MAX);
test_simd::<f32, 16, m512>(f32::MIN, f32::MAX);
test_simd::<i64, 8, m512i>(i64::MIN, i64::MAX);
test_simd::<u64, 8, m512i>(u64::MIN, u64::MAX);
test_simd::<f64, 8, m512d>(f64::MIN, f64::MAX);
#[cfg(feature = "nightly")]
{
portable_simd_tests!(
(i32, 16),
(u32, 16),
(f32, 16),
(i64, 8),
(u64, 8),
(f64, 8)
);
portable_mask_tests!((i32, 16), (i64, 8));
}
}
#[cfg(target_feature = "avx512f")]
#[test]
#[ignore]
fn avx512f_optim() {
use safe_arch::{m512, m512d, m512i};
test_unoptimized_value_type::<m512>();
test_unoptimized_value_type::<m512d>();
test_unoptimized_value_type::<m512i>();
#[cfg(feature = "nightly")]
{
portable_simd_tests_optim!(
(i32, 16),
(u32, 16),
(f32, 16),
(i64, 8),
(u64, 8),
(f64, 8)
);
portable_mask_tests_optim!((i32, 16), (i64, 8));
}
}
#[cfg(target_feature = "avx512bw")]
#[test]
fn avx512bw() {
use safe_arch::m512i;
test_simd::<i8, 64, m512i>(i8::MIN, i8::MAX);
test_simd::<u8, 64, m512i>(u8::MIN, u8::MAX);
test_simd::<i16, 32, m512i>(i16::MIN, i16::MAX);
test_simd::<u16, 32, m512i>(u16::MIN, u16::MAX);
#[cfg(feature = "nightly")]
{
portable_simd_tests!((i8, 64), (u8, 64), (i16, 32), (u16, 32));
portable_mask_tests!((i8, 64), (i16, 32));
}
}
#[cfg(all(feature = "nightly", target_feature = "avx512bw"))]
mod avx512 {
use super::*;
#[test]
#[ignore]
fn avx512bw_optim() {
portable_simd_tests_optim!((i8, 64), (u8, 64), (i16, 32), (u16, 32));
portable_mask_tests_optim!((i8, 64), (i16, 32));
}
}
#[test]
fn cpuid_result() {
test_value(CpuidResult {
eax: 0,
ebx: 0,
ecx: 0,
edx: 0,
});
test_value(CpuidResult {
eax: u32::MAX,
ebx: u32::MAX,
ecx: u32::MAX,
edx: u32::MAX,
});
}
#[test]
#[ignore]
fn cpuid_result_optim() {
test_unoptimized_value(CpuidResult {
eax: 0,
ebx: 0,
ecx: 0,
edx: 0,
});
}
}