llml_simd/
lib.rs

1#![feature(concat_idents, exclusive_range_pattern)]
2#![cfg_attr(target_feature = "sse", feature(stdarch_x86_mm_shuffle))]
3#![cfg_attr(target_arch = "wasm32", feature(simd_wasm64))]
4#![cfg_attr(not(feature = "use_std"), no_std)]
5
6macro_rules! flat_mod {
7    ($($i:ident),+) => {
8        $(
9            mod $i;
10            pub use $i::*;
11        )*
12    };
13}
14
15macro_rules! import {
16    ($($i:ident),+) => {
17        cfg_if::cfg_if! {
18            if #[cfg(feature = "force_naive")] {
19                $(pub use crate::naive::$i;)*
20            } else if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse"))] {
21                $(pub use crate::x86::$i;)*
22            } else if #[cfg(all(any(target_arch = "arm", target_arch = "aarch64"), target_feature = "neon"))] {
23                $(pub use crate::arm::$i;)*
24            } else if #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))] {
25                $(pub use crate::wasm::$i;)*
26            } else {
27                $(pub use crate::naive::$i;)*
28            }
29        }
30    };
31}
32
33include!("composite.rs");
34
35#[cfg(feature = "random")]
36include!("generics/random.rs");
37
38#[cfg(feature = "serialize")]
39include!("generics/serialize.rs");
40
41cfg_if::cfg_if! {
42    if #[cfg(feature = "force_naive")] {
43        mod naive;
44    } else if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse"))] {
45        mod x86;
46        include!("generics/float.rs");
47    } else if #[cfg(all(any(target_arch = "arm", target_arch = "aarch64"), target_feature = "neon"))] {
48        mod arm;
49        include!("generics/float.rs");
50    } else if #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))] {
51        mod wasm;
52        include!("generics/float.rs");
53    } else {
54        mod naive;
55    }
56}
57
58/// Floating-point vectors
59pub mod float {
60    /// Single-precision floating point vectors
61    pub mod single {
62        import!(f32x2, f32x4, f32x6, f32x8, f32x10, f32x12, f32x14, f32x16);
63    }
64
65    /// Double-precision floating point vectors
66    pub mod double {
67        import!(f64x2, f64x4, f64x6, f64x8, f64x10, f64x12, f64x14, f64x16);
68    }
69}
70
71/// Check current implementation
72pub enum LlmlImpl {
73    /// x86/x86_64 SSE (128-bit) implementation
74    SSE,
75
76    /// x86/x86_64 AVX (128-bit to 256-bit) implementation
77    AVX,
78
79    /// arm/aarch64 NEON (64-bit to 128-bit) implementation
80    NEON,
81
82    /// WASM32 SIMD128 proposal (128-bit) implementation
83    WASM,
84
85    /// Naive implementation with arrays. Useful as a backup if no other method is available
86    NAIVE,
87}
88
89impl LlmlImpl {
90    pub const CURRENT: Self = current_impl();
91
92    #[inline]
93    pub const fn is_64bit(&self) -> bool {
94        matches!(self, LlmlImpl::NEON)
95    }
96
97    #[inline]
98    pub const fn is_128bit(&self) -> bool {
99        match self {
100            LlmlImpl::NAIVE => false,
101            _ => true,
102        }
103    }
104
105    #[inline]
106    pub const fn is_256bit(&self) -> bool {
107        matches!(self, LlmlImpl::AVX)
108    }
109}
110
111#[inline]
112pub const fn current_impl() -> LlmlImpl {
113    cfg_if::cfg_if! {
114        if #[cfg(feature = "force_naive")] {
115            LlmlImpl::NAIVE
116        } else if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse"))] {
117            #[cfg(all(feature = "use_avx", target_feature = "avx"))]
118            return LlmlImpl::AVX;
119            #[cfg(not(all(feature = "use_avx", target_feature = "avx")))]
120            LlmlImpl::SSE
121        } else if #[cfg(all(any(target_arch = "arm", target_arch = "aarch64"), target_feature = "neon"))] {
122            LlmlImpl::NEON
123        } else if #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))] {
124            LlmlImpl::WASM
125        } else {
126            LlmlImpl::NAIVE
127        }
128    }
129}
llml_simd/lib.rs

llml_simd/
lib.rs