Skip to main content

sklears_simd/
lib.rs

1//! SIMD-optimized operations for sklears
2//!
3//! This crate provides SIMD-accelerated implementations of common machine learning operations.
4//!
5//! ## SciRS2 Policy Compliance
6//! - SIMD operations delegated to scirs2-core's backend
7//! - Works on stable Rust (no nightly features required)
8//! - Platform-specific optimizations handled by ndarray/BLAS
9
10// Note: no-std feature is temporarily disabled until implementation is complete
11#![cfg_attr(feature = "no-std", no_std)]
12// The simd_feature_detected! macro wraps is_x86_feature_detected! with different
13// feature strings. Clippy cannot distinguish these macro invocations and fires
14// false-positive "same condition" / "simplified bool" lints. These are intentional
15// SIMD dispatch patterns and the lints are suppressed at crate level.
16#![allow(clippy::ifs_same_cond)]
17#![allow(clippy::nonminimal_bool)]
18#![allow(clippy::eq_op)]
19
20#[cfg(feature = "no-std")]
21extern crate alloc;
22
23// No-std compatible print macros (no-op for tests)
24#[cfg(feature = "no-std")]
25#[macro_export]
26macro_rules! println {
27    ($($arg:tt)*) => {{}};
28}
29
30#[cfg(feature = "no-std")]
31#[macro_export]
32macro_rules! eprintln {
33    ($($arg:tt)*) => {{}};
34}
35
36// Conditional SIMD feature detection macro
37// In no-std mode, always return false (use scalar fallback)
38// In std mode, use the actual is_x86_feature_detected! macro
39#[cfg(all(
40    any(target_arch = "x86", target_arch = "x86_64"),
41    not(feature = "no-std")
42))]
43#[macro_export]
44macro_rules! simd_feature_detected {
45    ($feature:tt) => {
46        std::arch::is_x86_feature_detected!($feature)
47    };
48}
49
50#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "no-std"))]
51#[macro_export]
52#[allow(unused_macros)]
53macro_rules! simd_feature_detected {
54    ($feature:tt) => {
55        false
56    };
57}
58
59#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
60#[macro_export]
61#[allow(unused_macros)]
62macro_rules! simd_feature_detected {
63    ($feature:tt) => {
64        false
65    };
66}
67
68// Re-export for use in submodules
69
70pub mod activation;
71pub mod adaptive_optimization;
72pub mod advanced_optimizations;
73pub mod allocator;
74pub mod approximate;
75pub mod audio_processing;
76pub mod batch_operations;
77pub mod benchmark_framework;
78pub mod bit_operations;
79pub mod clustering;
80pub mod comprehensive_benchmarks;
81pub mod compression;
82pub mod custom_accelerator;
83pub mod distance;
84pub mod distributions;
85pub mod energy_benchmarks;
86pub mod error_correction;
87pub mod external_integration;
88pub mod fluent;
89pub mod fpga;
90pub mod gpu;
91pub mod gpu_memory;
92pub mod half_precision;
93pub mod image_processing;
94pub mod intrinsics;
95pub mod kernels;
96pub mod loss;
97pub mod matrix;
98pub mod memory;
99pub mod middleware;
100pub mod multi_gpu;
101pub mod neuromorphic;
102#[cfg(feature = "no-std")]
103pub mod no_std;
104pub mod optimization;
105pub mod optimization_hints;
106pub mod performance_hooks;
107pub mod performance_monitor;
108pub mod plugin_architecture;
109pub mod profiling;
110pub mod quantum;
111pub mod reduction;
112pub mod regression;
113#[cfg(target_arch = "riscv64")]
114pub mod riscv_vector;
115pub mod safe_simd;
116pub mod safety;
117pub mod search;
118pub mod signal_processing;
119pub mod sorting;
120pub mod target;
121pub mod tpu;
122pub mod traits;
123pub mod validation;
124pub mod vector;
125
126// Re-export key types and functions
127pub use clustering::LinkageType;
128
129/// Platform-specific SIMD capabilities
130#[derive(Debug, Clone, Copy)]
131pub struct SimdCapabilities {
132    pub sse: bool,
133    pub sse2: bool,
134    pub sse3: bool,
135    pub ssse3: bool,
136    pub sse41: bool,
137    pub sse42: bool,
138    pub avx: bool,
139    pub avx2: bool,
140    pub avx512: bool,
141    pub neon: bool,
142    pub riscv_vector: bool,
143    pub riscv_vlen: usize,
144}
145
146impl SimdCapabilities {
147    /// Detect available SIMD instructions on the current platform
148    pub fn detect() -> Self {
149        Self {
150            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
151            sse: simd_feature_detected!("sse"),
152            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
153            sse2: simd_feature_detected!("sse2"),
154            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
155            sse3: simd_feature_detected!("sse3"),
156            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
157            ssse3: simd_feature_detected!("ssse3"),
158            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
159            sse41: simd_feature_detected!("sse4.1"),
160            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
161            sse42: simd_feature_detected!("sse4.2"),
162            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
163            avx: simd_feature_detected!("avx"),
164            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
165            avx2: simd_feature_detected!("avx2"),
166            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
167            avx512: simd_feature_detected!("avx512f"),
168            #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
169            sse: false,
170            #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
171            sse2: false,
172            #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
173            sse3: false,
174            #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
175            ssse3: false,
176            #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
177            sse41: false,
178            #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
179            sse42: false,
180            #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
181            avx: false,
182            #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
183            avx2: false,
184            #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
185            avx512: false,
186            #[cfg(target_arch = "aarch64")]
187            neon: true,
188            #[cfg(not(target_arch = "aarch64"))]
189            neon: false,
190
191            #[cfg(target_arch = "riscv64")]
192            riscv_vector: {
193                #[cfg(target_arch = "riscv64")]
194                {
195                    crate::riscv_vector::RiscVVectorCaps::detect().available
196                }
197                #[cfg(not(target_arch = "riscv64"))]
198                {
199                    false
200                }
201            },
202            #[cfg(not(target_arch = "riscv64"))]
203            riscv_vector: false,
204
205            #[cfg(target_arch = "riscv64")]
206            riscv_vlen: {
207                #[cfg(target_arch = "riscv64")]
208                {
209                    crate::riscv_vector::RiscVVectorCaps::detect().vlen
210                }
211                #[cfg(not(target_arch = "riscv64"))]
212                {
213                    0
214                }
215            },
216            #[cfg(not(target_arch = "riscv64"))]
217            riscv_vlen: 0,
218        }
219    }
220
221    /// Get the best available SIMD width for f32 operations
222    pub fn best_f32_width(&self) -> usize {
223        if self.avx512 {
224            16 // 512 bits / 32 bits
225        } else if self.avx2 || self.avx {
226            8 // 256 bits / 32 bits
227        } else if self.sse || self.neon {
228            4 // 128 bits / 32 bits
229        } else if self.riscv_vector && self.riscv_vlen > 0 {
230            self.riscv_vlen / 32 // VLEN bits / 32 bits per f32
231        } else {
232            1 // Scalar fallback
233        }
234    }
235
236    /// Get the best available SIMD width for f64 operations
237    pub fn best_f64_width(&self) -> usize {
238        if self.avx512 {
239            8 // 512 bits / 64 bits
240        } else if self.avx2 || self.avx {
241            4 // 256 bits / 64 bits
242        } else if self.sse2 || self.neon {
243            2 // 128 bits / 64 bits
244        } else if self.riscv_vector && self.riscv_vlen > 0 {
245            self.riscv_vlen / 64 // VLEN bits / 64 bits per f64
246        } else {
247            1 // Scalar fallback
248        }
249    }
250
251    /// Get the platform name for current SIMD capabilities
252    pub fn platform_name(&self) -> &'static str {
253        if self.avx512 {
254            "AVX-512"
255        } else if self.avx2 {
256            "AVX2"
257        } else if self.avx {
258            "AVX"
259        } else if self.sse42 {
260            "SSE4.2"
261        } else if self.sse41 {
262            "SSE4.1"
263        } else if self.ssse3 {
264            "SSSE3"
265        } else if self.sse3 {
266            "SSE3"
267        } else if self.sse2 {
268            "SSE2"
269        } else if self.sse {
270            "SSE"
271        } else if self.neon {
272            "NEON"
273        } else if self.riscv_vector {
274            "RISC-V Vector"
275        } else {
276            "Scalar"
277        }
278    }
279}
280
281/// Global SIMD capabilities detection
282pub static SIMD_CAPS: once_cell::sync::Lazy<SimdCapabilities> =
283    once_cell::sync::Lazy::new(SimdCapabilities::detect);
284
285#[allow(non_snake_case)]
286#[cfg(all(test, not(feature = "no-std")))]
287mod tests {
288    use super::*;
289
290    #[test]
291    fn test_simd_detection() {
292        let caps = SimdCapabilities::detect();
293        println!("SIMD Capabilities: {:?}", caps);
294
295        // At least one width should be available
296        assert!(caps.best_f32_width() >= 1);
297        assert!(caps.best_f64_width() >= 1);
298    }
299}