1#![cfg_attr(feature = "no-std", no_std)]
12#![allow(clippy::ifs_same_cond)]
17#![allow(clippy::nonminimal_bool)]
18#![allow(clippy::eq_op)]
19
20#[cfg(feature = "no-std")]
21extern crate alloc;
22
23#[cfg(feature = "no-std")]
25#[macro_export]
26macro_rules! println {
27 ($($arg:tt)*) => {{}};
28}
29
30#[cfg(feature = "no-std")]
31#[macro_export]
32macro_rules! eprintln {
33 ($($arg:tt)*) => {{}};
34}
35
36#[cfg(all(
40 any(target_arch = "x86", target_arch = "x86_64"),
41 not(feature = "no-std")
42))]
43#[macro_export]
44macro_rules! simd_feature_detected {
45 ($feature:tt) => {
46 std::arch::is_x86_feature_detected!($feature)
47 };
48}
49
50#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "no-std"))]
51#[macro_export]
52#[allow(unused_macros)]
53macro_rules! simd_feature_detected {
54 ($feature:tt) => {
55 false
56 };
57}
58
59#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
60#[macro_export]
61#[allow(unused_macros)]
62macro_rules! simd_feature_detected {
63 ($feature:tt) => {
64 false
65 };
66}
67
68pub mod activation;
71pub mod adaptive_optimization;
72pub mod advanced_optimizations;
73pub mod allocator;
74pub mod approximate;
75pub mod audio_processing;
76pub mod batch_operations;
77pub mod benchmark_framework;
78pub mod bit_operations;
79pub mod clustering;
80pub mod comprehensive_benchmarks;
81pub mod compression;
82pub mod custom_accelerator;
83pub mod distance;
84pub mod distributions;
85pub mod energy_benchmarks;
86pub mod error_correction;
87pub mod external_integration;
88pub mod fluent;
89pub mod fpga;
90pub mod gpu;
91pub mod gpu_memory;
92pub mod half_precision;
93pub mod image_processing;
94pub mod intrinsics;
95pub mod kernels;
96pub mod loss;
97pub mod matrix;
98pub mod memory;
99pub mod middleware;
100pub mod multi_gpu;
101pub mod neuromorphic;
102#[cfg(feature = "no-std")]
103pub mod no_std;
104pub mod optimization;
105pub mod optimization_hints;
106pub mod performance_hooks;
107pub mod performance_monitor;
108pub mod plugin_architecture;
109pub mod profiling;
110pub mod quantum;
111pub mod reduction;
112pub mod regression;
113#[cfg(target_arch = "riscv64")]
114pub mod riscv_vector;
115pub mod safe_simd;
116pub mod safety;
117pub mod search;
118pub mod signal_processing;
119pub mod sorting;
120pub mod target;
121pub mod tpu;
122pub mod traits;
123pub mod validation;
124pub mod vector;
125
126pub use clustering::LinkageType;
128
129#[derive(Debug, Clone, Copy)]
131pub struct SimdCapabilities {
132 pub sse: bool,
133 pub sse2: bool,
134 pub sse3: bool,
135 pub ssse3: bool,
136 pub sse41: bool,
137 pub sse42: bool,
138 pub avx: bool,
139 pub avx2: bool,
140 pub avx512: bool,
141 pub neon: bool,
142 pub riscv_vector: bool,
143 pub riscv_vlen: usize,
144}
145
146impl SimdCapabilities {
147 pub fn detect() -> Self {
149 Self {
150 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
151 sse: simd_feature_detected!("sse"),
152 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
153 sse2: simd_feature_detected!("sse2"),
154 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
155 sse3: simd_feature_detected!("sse3"),
156 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
157 ssse3: simd_feature_detected!("ssse3"),
158 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
159 sse41: simd_feature_detected!("sse4.1"),
160 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
161 sse42: simd_feature_detected!("sse4.2"),
162 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
163 avx: simd_feature_detected!("avx"),
164 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
165 avx2: simd_feature_detected!("avx2"),
166 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
167 avx512: simd_feature_detected!("avx512f"),
168 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
169 sse: false,
170 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
171 sse2: false,
172 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
173 sse3: false,
174 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
175 ssse3: false,
176 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
177 sse41: false,
178 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
179 sse42: false,
180 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
181 avx: false,
182 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
183 avx2: false,
184 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
185 avx512: false,
186 #[cfg(target_arch = "aarch64")]
187 neon: true,
188 #[cfg(not(target_arch = "aarch64"))]
189 neon: false,
190
191 #[cfg(target_arch = "riscv64")]
192 riscv_vector: {
193 #[cfg(target_arch = "riscv64")]
194 {
195 crate::riscv_vector::RiscVVectorCaps::detect().available
196 }
197 #[cfg(not(target_arch = "riscv64"))]
198 {
199 false
200 }
201 },
202 #[cfg(not(target_arch = "riscv64"))]
203 riscv_vector: false,
204
205 #[cfg(target_arch = "riscv64")]
206 riscv_vlen: {
207 #[cfg(target_arch = "riscv64")]
208 {
209 crate::riscv_vector::RiscVVectorCaps::detect().vlen
210 }
211 #[cfg(not(target_arch = "riscv64"))]
212 {
213 0
214 }
215 },
216 #[cfg(not(target_arch = "riscv64"))]
217 riscv_vlen: 0,
218 }
219 }
220
221 pub fn best_f32_width(&self) -> usize {
223 if self.avx512 {
224 16 } else if self.avx2 || self.avx {
226 8 } else if self.sse || self.neon {
228 4 } else if self.riscv_vector && self.riscv_vlen > 0 {
230 self.riscv_vlen / 32 } else {
232 1 }
234 }
235
236 pub fn best_f64_width(&self) -> usize {
238 if self.avx512 {
239 8 } else if self.avx2 || self.avx {
241 4 } else if self.sse2 || self.neon {
243 2 } else if self.riscv_vector && self.riscv_vlen > 0 {
245 self.riscv_vlen / 64 } else {
247 1 }
249 }
250
251 pub fn platform_name(&self) -> &'static str {
253 if self.avx512 {
254 "AVX-512"
255 } else if self.avx2 {
256 "AVX2"
257 } else if self.avx {
258 "AVX"
259 } else if self.sse42 {
260 "SSE4.2"
261 } else if self.sse41 {
262 "SSE4.1"
263 } else if self.ssse3 {
264 "SSSE3"
265 } else if self.sse3 {
266 "SSE3"
267 } else if self.sse2 {
268 "SSE2"
269 } else if self.sse {
270 "SSE"
271 } else if self.neon {
272 "NEON"
273 } else if self.riscv_vector {
274 "RISC-V Vector"
275 } else {
276 "Scalar"
277 }
278 }
279}
280
281pub static SIMD_CAPS: once_cell::sync::Lazy<SimdCapabilities> =
283 once_cell::sync::Lazy::new(SimdCapabilities::detect);
284
285#[allow(non_snake_case)]
286#[cfg(all(test, not(feature = "no-std")))]
287mod tests {
288 use super::*;
289
290 #[test]
291 fn test_simd_detection() {
292 let caps = SimdCapabilities::detect();
293 println!("SIMD Capabilities: {:?}", caps);
294
295 assert!(caps.best_f32_width() >= 1);
297 assert!(caps.best_f64_width() >= 1);
298 }
299}