1use std::sync::LazyLock;
5
6pub enum SimdSupport {
8 None,
9 Neon,
10 Sse,
11 Avx2,
12 Avx512,
13 Lsx,
14 Lasx,
15}
16
17pub static FP16_SIMD_SUPPORT: LazyLock<SimdSupport> = LazyLock::new(|| {
19 #[cfg(target_arch = "aarch64")]
20 {
21 if aarch64::has_neon_f16_support() {
22 SimdSupport::Neon
23 } else {
24 SimdSupport::None
25 }
26 }
27 #[cfg(target_arch = "x86_64")]
28 {
29 if x86::has_avx512_f16_support() {
30 SimdSupport::Avx512
31 } else if is_x86_feature_detected!("avx2") {
32 SimdSupport::Avx2
33 } else {
34 SimdSupport::None
35 }
36 }
37 #[cfg(target_arch = "loongarch64")]
38 {
39 if loongarch64::has_lasx_support() {
40 SimdSupport::Lasx
41 } else if loongarch64::has_lsx_support() {
42 SimdSupport::Lsx
43 } else {
44 SimdSupport::None
45 }
46 }
47});
48
49#[cfg(target_arch = "x86_64")]
50mod x86 {
51 use core::arch::x86_64::__cpuid;
52
53 #[inline]
54 fn check_flag(x: usize, position: u32) -> bool {
55 x & (1 << position) != 0
56 }
57
58 pub fn has_avx512_f16_support() -> bool {
59 if !is_x86_feature_detected!("avx512f") {
61 return false;
62 }
63
64 let ext_cpuid_result = unsafe { __cpuid(7) };
68 check_flag(ext_cpuid_result.edx as usize, 23)
69 }
70}
71
72#[cfg(all(target_arch = "aarch64", target_os = "macos"))]
77mod aarch64 {
78 pub fn has_neon_f16_support() -> bool {
79 true
81 }
82}
83
84#[cfg(all(target_arch = "aarch64", target_os = "linux"))]
85mod aarch64 {
86 pub fn has_neon_f16_support() -> bool {
87 let flags = unsafe { libc::getauxval(libc::AT_HWCAP) };
89 flags & libc::HWCAP_FPHP != 0
90 }
91}
92
93#[cfg(all(target_arch = "aarch64", target_os = "windows"))]
94mod aarch64 {
95 pub fn has_neon_f16_support() -> bool {
96 false
98 }
99}
100
101#[cfg(target_arch = "loongarch64")]
102mod loongarch64 {
103 pub fn has_lsx_support() -> bool {
104 let flags = unsafe { libc::getauxval(libc::AT_HWCAP) };
106 flags & libc::HWCAP_LOONGARCH_LSX != 0
107 }
108 pub fn has_lasx_support() -> bool {
109 let flags = unsafe { libc::getauxval(libc::AT_HWCAP) };
111 flags & libc::HWCAP_LOONGARCH_LASX != 0
112 }
113}
114
115#[cfg(all(target_arch = "aarch64", target_os = "android"))]
116mod aarch64 {
117 pub fn has_neon_f16_support() -> bool {
118 false
119 }
120}