1#![allow(dead_code)]
2#![allow(non_snake_case)]
3#![allow(missing_docs)]
4#![allow(deprecated)]
5#![allow(unexpected_cfgs)]
6#![allow(clippy::all)]
7#![allow(clippy::pedantic)]
8#![allow(clippy::nursery)]
9#![allow(incomplete_features)]
19#![cfg_attr(feature = "no-std", no_std)]
21
22#[cfg(feature = "no-std")]
23extern crate alloc;
24
25#[cfg(feature = "no-std")]
27#[macro_export]
28macro_rules! println {
29 ($($arg:tt)*) => {{}};
30}
31
32#[cfg(feature = "no-std")]
33#[macro_export]
34macro_rules! eprintln {
35 ($($arg:tt)*) => {{}};
36}
37
38#[cfg(all(
42 any(target_arch = "x86", target_arch = "x86_64"),
43 not(feature = "no-std")
44))]
45#[macro_export]
46macro_rules! simd_feature_detected {
47 ($feature:tt) => {
48 std::arch::is_x86_feature_detected!($feature)
49 };
50}
51
52#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "no-std"))]
53#[macro_export]
54#[allow(unused_macros)]
55macro_rules! simd_feature_detected {
56 ($feature:tt) => {
57 false
58 };
59}
60
61#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
62#[macro_export]
63#[allow(unused_macros)]
64macro_rules! simd_feature_detected {
65 ($feature:tt) => {
66 false
67 };
68}
69
70pub mod activation;
73pub mod adaptive_optimization;
74pub mod advanced_optimizations;
75pub mod allocator;
76pub mod approximate;
77pub mod audio_processing;
78pub mod batch_operations;
79pub mod benchmark_framework;
80pub mod bit_operations;
81pub mod clustering;
82pub mod comprehensive_benchmarks;
83pub mod compression;
84pub mod custom_accelerator;
85pub mod distance;
86pub mod distributions;
87pub mod energy_benchmarks;
88pub mod error_correction;
89pub mod external_integration;
90pub mod fluent;
91pub mod fpga;
92pub mod gpu;
93pub mod gpu_memory;
94pub mod half_precision;
95pub mod image_processing;
96pub mod intrinsics;
97pub mod kernels;
98pub mod loss;
99pub mod matrix;
100pub mod memory;
101pub mod middleware;
102pub mod multi_gpu;
103pub mod neuromorphic;
104#[cfg(feature = "no-std")]
105pub mod no_std;
106pub mod optimization;
107pub mod optimization_hints;
108pub mod performance_hooks;
109pub mod performance_monitor;
110pub mod plugin_architecture;
111pub mod profiling;
112pub mod quantum;
113pub mod reduction;
114pub mod regression;
115#[cfg(target_arch = "riscv64")]
116pub mod riscv_vector;
117pub mod safe_simd;
118pub mod safety;
119pub mod search;
120pub mod signal_processing;
121pub mod sorting;
122pub mod target;
123pub mod tpu;
124pub mod traits;
125pub mod validation;
126pub mod vector;
127
128pub use clustering::LinkageType;
130
131#[derive(Debug, Clone, Copy)]
133pub struct SimdCapabilities {
134 pub sse: bool,
135 pub sse2: bool,
136 pub sse3: bool,
137 pub ssse3: bool,
138 pub sse41: bool,
139 pub sse42: bool,
140 pub avx: bool,
141 pub avx2: bool,
142 pub avx512: bool,
143 pub neon: bool,
144 pub riscv_vector: bool,
145 pub riscv_vlen: usize,
146}
147
148impl SimdCapabilities {
149 pub fn detect() -> Self {
151 Self {
152 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
153 sse: simd_feature_detected!("sse"),
154 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
155 sse2: simd_feature_detected!("sse2"),
156 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
157 sse3: simd_feature_detected!("sse3"),
158 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
159 ssse3: simd_feature_detected!("ssse3"),
160 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
161 sse41: simd_feature_detected!("sse4.1"),
162 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
163 sse42: simd_feature_detected!("sse4.2"),
164 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
165 avx: simd_feature_detected!("avx"),
166 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
167 avx2: simd_feature_detected!("avx2"),
168 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
169 avx512: simd_feature_detected!("avx512f"),
170 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
171 sse: false,
172 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
173 sse2: false,
174 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
175 sse3: false,
176 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
177 ssse3: false,
178 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
179 sse41: false,
180 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
181 sse42: false,
182 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
183 avx: false,
184 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
185 avx2: false,
186 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
187 avx512: false,
188 #[cfg(target_arch = "aarch64")]
189 neon: true,
190 #[cfg(not(target_arch = "aarch64"))]
191 neon: false,
192
193 #[cfg(target_arch = "riscv64")]
194 riscv_vector: {
195 #[cfg(target_arch = "riscv64")]
196 {
197 crate::riscv_vector::RiscVVectorCaps::detect().available
198 }
199 #[cfg(not(target_arch = "riscv64"))]
200 {
201 false
202 }
203 },
204 #[cfg(not(target_arch = "riscv64"))]
205 riscv_vector: false,
206
207 #[cfg(target_arch = "riscv64")]
208 riscv_vlen: {
209 #[cfg(target_arch = "riscv64")]
210 {
211 crate::riscv_vector::RiscVVectorCaps::detect().vlen
212 }
213 #[cfg(not(target_arch = "riscv64"))]
214 {
215 0
216 }
217 },
218 #[cfg(not(target_arch = "riscv64"))]
219 riscv_vlen: 0,
220 }
221 }
222
223 pub fn best_f32_width(&self) -> usize {
225 if self.avx512 {
226 16 } else if self.avx2 || self.avx {
228 8 } else if self.sse || self.neon {
230 4 } else if self.riscv_vector && self.riscv_vlen > 0 {
232 self.riscv_vlen / 32 } else {
234 1 }
236 }
237
238 pub fn best_f64_width(&self) -> usize {
240 if self.avx512 {
241 8 } else if self.avx2 || self.avx {
243 4 } else if self.sse2 || self.neon {
245 2 } else if self.riscv_vector && self.riscv_vlen > 0 {
247 self.riscv_vlen / 64 } else {
249 1 }
251 }
252
253 pub fn platform_name(&self) -> &'static str {
255 if self.avx512 {
256 "AVX-512"
257 } else if self.avx2 {
258 "AVX2"
259 } else if self.avx {
260 "AVX"
261 } else if self.sse42 {
262 "SSE4.2"
263 } else if self.sse41 {
264 "SSE4.1"
265 } else if self.ssse3 {
266 "SSSE3"
267 } else if self.sse3 {
268 "SSE3"
269 } else if self.sse2 {
270 "SSE2"
271 } else if self.sse {
272 "SSE"
273 } else if self.neon {
274 "NEON"
275 } else if self.riscv_vector {
276 "RISC-V Vector"
277 } else {
278 "Scalar"
279 }
280 }
281}
282
283pub static SIMD_CAPS: once_cell::sync::Lazy<SimdCapabilities> =
285 once_cell::sync::Lazy::new(SimdCapabilities::detect);
286
287#[allow(non_snake_case)]
288#[cfg(all(test, not(feature = "no-std")))]
289mod tests {
290 use super::*;
291
292 #[test]
293 fn test_simd_detection() {
294 let caps = SimdCapabilities::detect();
295 println!("SIMD Capabilities: {:?}", caps);
296
297 assert!(caps.best_f32_width() >= 1);
299 assert!(caps.best_f64_width() >= 1);
300 }
301}