use std::sync::atomic::{AtomicU64, Ordering};
use super::{Scalar, Target, Target1, Target2, Target3};
mod algorithms;
mod common;
mod macros;
pub mod v3;
pub mod v4;
pub use v3::V3;
pub use v4::V4;
#[derive(Debug, Clone, Copy, PartialEq, PartialOrd)]
pub(super) enum LevelInner {
Scalar,
V3,
V4,
}
cfg_if::cfg_if! {
if #[cfg(all(
target_feature = "avx512f",
target_feature = "avx512bw",
target_feature = "avx512cd",
target_feature = "avx512dq",
target_feature = "avx512vl",
target_feature = "avx512vnni",
// target_feature = "avx512bitalg",
// target_feature = "avx512vpopcntdq",
))] {
pub type Current = V4;
pub const fn current() -> Current {
unsafe { V4::new() }
}
} else if #[cfg(all(
target_feature = "avx2",
target_feature = "bmi1",
target_feature = "bmi2",
target_feature = "f16c",
target_feature = "fma",
target_feature = "lzcnt",
target_feature = "movbe",
target_feature = "xsave",
not(doc),
))] {
pub type Current = V3;
pub const fn current() -> Current {
unsafe { V3::new() }
}
} else {
pub type Current = Scalar;
pub const fn current() -> Current {
Scalar::new()
}
}
}
static ARCH_NUMBER: AtomicU64 = AtomicU64::new(ARCH_UNINITIALIZED);
const ARCH_UNINITIALIZED: u64 = 0;
const ARCH_SCALAR: u64 = 1;
const ARCH_V3: u64 = 2;
const ARCH_V4: u64 = 3;
macro_rules! get_or_set_architecture {
() => {{
use std::sync::atomic::Ordering;
let mut version = $crate::arch::x86_64::ARCH_NUMBER.load(Ordering::Relaxed);
if version == $crate::arch::x86_64::ARCH_UNINITIALIZED {
version = $crate::arch::x86_64::resolve_architecture();
}
version
}};
}
pub(super) use get_or_set_architecture;
fn arch_number() -> u64 {
if is_x86_feature_detected!("avx2")
&& is_x86_feature_detected!("avx")
&& is_x86_feature_detected!("bmi1")
&& is_x86_feature_detected!("bmi2")
&& is_x86_feature_detected!("f16c")
&& is_x86_feature_detected!("fma")
&& is_x86_feature_detected!("lzcnt")
&& is_x86_feature_detected!("movbe")
&& is_x86_feature_detected!("xsave")
{
if is_x86_feature_detected!("avx512f")
&& is_x86_feature_detected!("avx512bw")
&& is_x86_feature_detected!("avx512cd")
&& is_x86_feature_detected!("avx512dq")
&& is_x86_feature_detected!("avx512vl")
&& is_x86_feature_detected!("avx512vnni")
{
ARCH_V4
} else {
ARCH_V3
}
} else {
ARCH_SCALAR
}
}
#[inline(never)]
fn resolve_architecture() -> u64 {
let arch = arch_number();
ARCH_NUMBER.store(arch, Ordering::Relaxed);
arch
}
macro_rules! impl_dispatch {
(
$name:ident,
$resolve:ident,
$name_no_features:ident,
$resolve_no_features:ident,
$target:ident,
$method:ident,
{ $($x:ident )* },
{ $($A:ident )* }
) => {
#[doc = concat!(stringify!($name_no_features), ".")]
#[inline]
pub fn $name<T, R, $($A,)*>(f: T, $($x: $A,)*) -> R
where
T: $target<V4, R, $($A,)*>
+ $target<V3, R, $($A,)*>
+ $target<Scalar, R, $($A,)*>,
{
let version = ARCH_NUMBER.load(Ordering::Relaxed);
if version == ARCH_UNINITIALIZED {
$resolve(f, $($x,)*)
} else if version == ARCH_V4 {
let arch = unsafe { V4::new() };
unsafe { arch.$method(f, $($x,)*) }
} else if version == ARCH_V3 {
let arch = unsafe { V3::new() };
unsafe { arch.$method(f, $($x,)*) }
} else {
f.run(Scalar::new(), $($x,)*)
}
}
#[inline]
pub fn $name_no_features<T, R, $($A,)*>(f: T, $($x: $A,)*) -> R
where
T: $target<V4, R, $($A,)*>
+ $target<V3, R, $($A,)*>
+ $target<Scalar, R, $($A,)*>,
{
let version = ARCH_NUMBER.load(Ordering::Relaxed);
if version == ARCH_UNINITIALIZED {
$resolve_no_features(f, $($x,)*)
} else if version == ARCH_V4 {
let arch = unsafe { V4::new() };
f.run(arch, $($x,)*)
} else if version == ARCH_V3 {
let arch = unsafe { V3::new() };
f.run(arch, $($x,)*)
} else {
f.run(Scalar::new(), $($x,)*)
}
}
#[inline(never)]
fn $resolve<T, R, $($A,)*>(f: T, $($x: $A,)*) -> R
where
T: $target<V4, R, $($A,)*>
+ $target<V3, R, $($A,)*>
+ $target<Scalar, R, $($A,)*>,
{
resolve_architecture();
$name(f, $($x,)*)
}
#[inline(never)]
fn $resolve_no_features<T, R, $($A,)*>(f: T, $($x: $A,)*) -> R
where
T: $target<V4, R, $($A,)*>
+ $target<V3, R, $($A,)*>
+ $target<Scalar, R, $($A,)*>,
{
resolve_architecture();
$name_no_features(f, $($x,)*)
}
}
}
impl_dispatch!(
dispatch,
dispatch_resolve,
dispatch_no_features,
dispatch_resolve_no_features,
Target,
run_with,
{},
{}
);
impl_dispatch!(
dispatch1,
dispatch_resolve1,
dispatch1_no_features,
dispatch_resolve1_no_features,
Target1,
run_with_1,
{ x0 },
{ T0 }
);
impl_dispatch!(
dispatch2,
dispatch_resolve2,
dispatch2_no_features,
dispatch_resolve2_no_features,
Target2,
run_with_2,
{ x0 x1 },
{ T0 T1 }
);
impl_dispatch!(
dispatch3,
dispatch_resolve3,
dispatch3_no_features,
dispatch_resolve3_no_features,
Target3,
run_with_3,
{ x0 x1 x2 },
{ T0 T1 T2 }
);
#[cfg(test)]
static TEST_ARCH_NUMBER: AtomicU64 = AtomicU64::new(ARCH_UNINITIALIZED);
#[cfg(test)]
#[inline(never)]
pub(super) fn test_arch_number() -> u64 {
let mut requested = TEST_ARCH_NUMBER.load(Ordering::Relaxed);
if requested == ARCH_UNINITIALIZED {
requested = match crate::get_test_arch() {
Some(arch) => {
if arch == "all" || arch == "x86-64-v4" {
ARCH_V4
} else if arch == "x86-64-v3" {
ARCH_V3
} else if arch == "scalar" {
ARCH_SCALAR
} else {
panic!("Unrecognized test architecture: \"{arch}\"");
}
}
None => arch_number(),
};
TEST_ARCH_NUMBER.store(requested, Ordering::Relaxed);
};
requested
}
#[cfg(test)]
mod tests {
use super::*;
use crate::Architecture;
struct TestOp;
impl Target<Scalar, &'static str> for TestOp {
fn run(self, _: Scalar) -> &'static str {
"scalar"
}
}
impl Target1<Scalar, String, &str> for TestOp {
fn run(self, _: Scalar, x0: &str) -> String {
format!("scalar: {}", x0)
}
}
impl Target2<Scalar, String, &str, &str> for TestOp {
fn run(self, _: Scalar, x0: &str, x1: &str) -> String {
format!("scalar: {}, {}", x0, x1)
}
}
impl Target3<Scalar, String, &str, &str, &str> for TestOp {
fn run(self, _: Scalar, x0: &str, x1: &str, x2: &str) -> String {
format!("scalar: {}, {}, {}", x0, x1, x2)
}
}
impl Target<V3, &'static str> for TestOp {
fn run(self, _: V3) -> &'static str {
"v3"
}
}
impl Target1<V3, String, &str> for TestOp {
fn run(self, _: V3, x0: &str) -> String {
format!("v3: {}", x0)
}
}
impl Target2<V3, String, &str, &str> for TestOp {
fn run(self, _: V3, x0: &str, x1: &str) -> String {
format!("v3: {}, {}", x0, x1)
}
}
impl Target3<V3, String, &str, &str, &str> for TestOp {
#[inline(never)]
fn run(self, _: V3, x0: &str, x1: &str, x2: &str) -> String {
format!("v3: {}, {}, {}", x0, x1, x2)
}
}
impl Target<V4, &'static str> for TestOp {
fn run(self, _: V4) -> &'static str {
"v4"
}
}
impl Target1<V4, String, &str> for TestOp {
fn run(self, _: V4, x0: &str) -> String {
format!("v4: {}", x0)
}
}
impl Target2<V4, String, &str, &str> for TestOp {
fn run(self, _: V4, x0: &str, x1: &str) -> String {
format!("v4: {}, {}", x0, x1)
}
}
impl Target3<V4, String, &str, &str, &str> for TestOp {
#[inline(never)]
fn run(self, _: V4, x0: &str, x1: &str, x2: &str) -> String {
format!("v4: {}, {}, {}", x0, x1, x2)
}
}
#[test]
fn test_dispatch() {
ARCH_NUMBER.store(ARCH_V4, Ordering::Relaxed);
assert!(V4::new_checked().is_some());
assert!(V3::new_checked().is_some());
ARCH_NUMBER.store(ARCH_V3, Ordering::Relaxed);
assert!(V4::new_checked().is_none());
assert!(V3::new_checked().is_some());
ARCH_NUMBER.store(ARCH_SCALAR, Ordering::Relaxed);
assert!(V4::new_checked().is_none());
assert!(V3::new_checked().is_none());
assert_eq!(dispatch(TestOp), "scalar");
assert_eq!(dispatch1(TestOp, "foo"), "scalar: foo");
assert_eq!(dispatch2(TestOp, "foo", "bar"), "scalar: foo, bar");
assert_eq!(
dispatch3(TestOp, "foo", "bar", "baz"),
"scalar: foo, bar, baz",
);
assert_eq!(dispatch_no_features(TestOp), "scalar");
assert_eq!(dispatch1_no_features(TestOp, "foo"), "scalar: foo");
assert_eq!(
dispatch2_no_features(TestOp, "foo", "bar"),
"scalar: foo, bar"
);
assert_eq!(
dispatch3_no_features(TestOp, "foo", "bar", "baz"),
"scalar: foo, bar, baz",
);
ARCH_NUMBER.store(ARCH_V3, Ordering::Relaxed);
assert_eq!(dispatch(TestOp), "v3");
assert_eq!(dispatch1(TestOp, "foo"), "v3: foo");
assert_eq!(dispatch2(TestOp, "foo", "bar"), "v3: foo, bar");
assert_eq!(dispatch3(TestOp, "foo", "bar", "baz"), "v3: foo, bar, baz",);
assert_eq!(dispatch_no_features(TestOp), "v3");
assert_eq!(dispatch1_no_features(TestOp, "foo"), "v3: foo");
assert_eq!(dispatch2_no_features(TestOp, "foo", "bar"), "v3: foo, bar");
assert_eq!(
dispatch3_no_features(TestOp, "foo", "bar", "baz"),
"v3: foo, bar, baz",
);
ARCH_NUMBER.store(ARCH_V4, Ordering::Relaxed);
assert_eq!(dispatch(TestOp), "v4");
assert_eq!(dispatch1(TestOp, "foo"), "v4: foo");
assert_eq!(dispatch2(TestOp, "foo", "bar"), "v4: foo, bar");
assert_eq!(dispatch3(TestOp, "foo", "bar", "baz"), "v4: foo, bar, baz",);
assert_eq!(dispatch_no_features(TestOp), "v4");
assert_eq!(dispatch1_no_features(TestOp, "foo"), "v4: foo");
assert_eq!(dispatch2_no_features(TestOp, "foo", "bar"), "v4: foo, bar");
assert_eq!(
dispatch3_no_features(TestOp, "foo", "bar", "baz"),
"v4: foo, bar, baz",
);
ARCH_NUMBER.store(ARCH_UNINITIALIZED, Ordering::Relaxed);
let _ = dispatch(TestOp);
ARCH_NUMBER.store(ARCH_UNINITIALIZED, Ordering::Relaxed);
let _ = dispatch_no_features(TestOp);
}
#[test]
fn test_run() {
if let Some(arch) = V3::new_checked_uncached() {
let mut x = 10;
let y: &str = arch.run(|| {
x += 10;
"foo"
});
assert_eq!(x, 20);
assert_eq!(y, "foo");
}
}
#[test]
fn test_level_ordering() {
use crate::Architecture;
let scalar = Scalar::level();
let v3 = V3::level();
let v4 = V4::level();
assert!(scalar < v3);
assert!(scalar < v4);
assert!(v3 < v4);
assert!(v4 > v3);
assert!(v4 > scalar);
assert!(v3 > scalar);
assert_eq!(scalar, Scalar::level());
assert_eq!(v3, V3::level());
assert_eq!(v4, V4::level());
assert_ne!(scalar, v3);
assert_ne!(scalar, v4);
assert_ne!(v3, v4);
}
}