Skip to main content

sklears_simd/
lib.rs

1#![allow(dead_code)]
2#![allow(non_snake_case)]
3#![allow(missing_docs)]
4#![allow(deprecated)]
5#![allow(unexpected_cfgs)]
6#![allow(clippy::all)]
7#![allow(clippy::pedantic)]
8#![allow(clippy::nursery)]
9//! SIMD-optimized operations for sklears
10//!
11//! This crate provides SIMD-accelerated implementations of common machine learning operations.
12//!
13//! ## SciRS2 Policy Compliance
14//! ✅ SIMD operations delegated to scirs2-core's backend
15//! ✅ Works on stable Rust (no nightly features required)
16//! ✅ Platform-specific optimizations handled by ndarray/BLAS
17
18#![allow(incomplete_features)]
19// Note: no-std feature is temporarily disabled until implementation is complete
20#![cfg_attr(feature = "no-std", no_std)]
21
22#[cfg(feature = "no-std")]
23extern crate alloc;
24
25// No-std compatible print macros (no-op for tests)
26#[cfg(feature = "no-std")]
27#[macro_export]
28macro_rules! println {
29    ($($arg:tt)*) => {{}};
30}
31
32#[cfg(feature = "no-std")]
33#[macro_export]
34macro_rules! eprintln {
35    ($($arg:tt)*) => {{}};
36}
37
38// Conditional SIMD feature detection macro
39// In no-std mode, always return false (use scalar fallback)
40// In std mode, use the actual is_x86_feature_detected! macro
41#[cfg(all(
42    any(target_arch = "x86", target_arch = "x86_64"),
43    not(feature = "no-std")
44))]
45#[macro_export]
46macro_rules! simd_feature_detected {
47    ($feature:tt) => {
48        std::arch::is_x86_feature_detected!($feature)
49    };
50}
51
52#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "no-std"))]
53#[macro_export]
54#[allow(unused_macros)]
55macro_rules! simd_feature_detected {
56    ($feature:tt) => {
57        false
58    };
59}
60
61#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
62#[macro_export]
63#[allow(unused_macros)]
64macro_rules! simd_feature_detected {
65    ($feature:tt) => {
66        false
67    };
68}
69
70// Re-export for use in submodules
71
72pub mod activation;
73pub mod adaptive_optimization;
74pub mod advanced_optimizations;
75pub mod allocator;
76pub mod approximate;
77pub mod audio_processing;
78pub mod batch_operations;
79pub mod benchmark_framework;
80pub mod bit_operations;
81pub mod clustering;
82pub mod comprehensive_benchmarks;
83pub mod compression;
84pub mod custom_accelerator;
85pub mod distance;
86pub mod distributions;
87pub mod energy_benchmarks;
88pub mod error_correction;
89pub mod external_integration;
90pub mod fluent;
91pub mod fpga;
92pub mod gpu;
93pub mod gpu_memory;
94pub mod half_precision;
95pub mod image_processing;
96pub mod intrinsics;
97pub mod kernels;
98pub mod loss;
99pub mod matrix;
100pub mod memory;
101pub mod middleware;
102pub mod multi_gpu;
103pub mod neuromorphic;
104#[cfg(feature = "no-std")]
105pub mod no_std;
106pub mod optimization;
107pub mod optimization_hints;
108pub mod performance_hooks;
109pub mod performance_monitor;
110pub mod plugin_architecture;
111pub mod profiling;
112pub mod quantum;
113pub mod reduction;
114pub mod regression;
115#[cfg(target_arch = "riscv64")]
116pub mod riscv_vector;
117pub mod safe_simd;
118pub mod safety;
119pub mod search;
120pub mod signal_processing;
121pub mod sorting;
122pub mod target;
123pub mod tpu;
124pub mod traits;
125pub mod validation;
126pub mod vector;
127
128// Re-export key types and functions
129pub use clustering::LinkageType;
130
131/// Platform-specific SIMD capabilities
132#[derive(Debug, Clone, Copy)]
133pub struct SimdCapabilities {
134    pub sse: bool,
135    pub sse2: bool,
136    pub sse3: bool,
137    pub ssse3: bool,
138    pub sse41: bool,
139    pub sse42: bool,
140    pub avx: bool,
141    pub avx2: bool,
142    pub avx512: bool,
143    pub neon: bool,
144    pub riscv_vector: bool,
145    pub riscv_vlen: usize,
146}
147
148impl SimdCapabilities {
149    /// Detect available SIMD instructions on the current platform
150    pub fn detect() -> Self {
151        Self {
152            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
153            sse: simd_feature_detected!("sse"),
154            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
155            sse2: simd_feature_detected!("sse2"),
156            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
157            sse3: simd_feature_detected!("sse3"),
158            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
159            ssse3: simd_feature_detected!("ssse3"),
160            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
161            sse41: simd_feature_detected!("sse4.1"),
162            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
163            sse42: simd_feature_detected!("sse4.2"),
164            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
165            avx: simd_feature_detected!("avx"),
166            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
167            avx2: simd_feature_detected!("avx2"),
168            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
169            avx512: simd_feature_detected!("avx512f"),
170            #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
171            sse: false,
172            #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
173            sse2: false,
174            #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
175            sse3: false,
176            #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
177            ssse3: false,
178            #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
179            sse41: false,
180            #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
181            sse42: false,
182            #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
183            avx: false,
184            #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
185            avx2: false,
186            #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
187            avx512: false,
188            #[cfg(target_arch = "aarch64")]
189            neon: true,
190            #[cfg(not(target_arch = "aarch64"))]
191            neon: false,
192
193            #[cfg(target_arch = "riscv64")]
194            riscv_vector: {
195                #[cfg(target_arch = "riscv64")]
196                {
197                    crate::riscv_vector::RiscVVectorCaps::detect().available
198                }
199                #[cfg(not(target_arch = "riscv64"))]
200                {
201                    false
202                }
203            },
204            #[cfg(not(target_arch = "riscv64"))]
205            riscv_vector: false,
206
207            #[cfg(target_arch = "riscv64")]
208            riscv_vlen: {
209                #[cfg(target_arch = "riscv64")]
210                {
211                    crate::riscv_vector::RiscVVectorCaps::detect().vlen
212                }
213                #[cfg(not(target_arch = "riscv64"))]
214                {
215                    0
216                }
217            },
218            #[cfg(not(target_arch = "riscv64"))]
219            riscv_vlen: 0,
220        }
221    }
222
223    /// Get the best available SIMD width for f32 operations
224    pub fn best_f32_width(&self) -> usize {
225        if self.avx512 {
226            16 // 512 bits / 32 bits
227        } else if self.avx2 || self.avx {
228            8 // 256 bits / 32 bits
229        } else if self.sse || self.neon {
230            4 // 128 bits / 32 bits
231        } else if self.riscv_vector && self.riscv_vlen > 0 {
232            self.riscv_vlen / 32 // VLEN bits / 32 bits per f32
233        } else {
234            1 // Scalar fallback
235        }
236    }
237
238    /// Get the best available SIMD width for f64 operations
239    pub fn best_f64_width(&self) -> usize {
240        if self.avx512 {
241            8 // 512 bits / 64 bits
242        } else if self.avx2 || self.avx {
243            4 // 256 bits / 64 bits
244        } else if self.sse2 || self.neon {
245            2 // 128 bits / 64 bits
246        } else if self.riscv_vector && self.riscv_vlen > 0 {
247            self.riscv_vlen / 64 // VLEN bits / 64 bits per f64
248        } else {
249            1 // Scalar fallback
250        }
251    }
252
253    /// Get the platform name for current SIMD capabilities
254    pub fn platform_name(&self) -> &'static str {
255        if self.avx512 {
256            "AVX-512"
257        } else if self.avx2 {
258            "AVX2"
259        } else if self.avx {
260            "AVX"
261        } else if self.sse42 {
262            "SSE4.2"
263        } else if self.sse41 {
264            "SSE4.1"
265        } else if self.ssse3 {
266            "SSSE3"
267        } else if self.sse3 {
268            "SSE3"
269        } else if self.sse2 {
270            "SSE2"
271        } else if self.sse {
272            "SSE"
273        } else if self.neon {
274            "NEON"
275        } else if self.riscv_vector {
276            "RISC-V Vector"
277        } else {
278            "Scalar"
279        }
280    }
281}
282
283/// Global SIMD capabilities detection
284pub static SIMD_CAPS: once_cell::sync::Lazy<SimdCapabilities> =
285    once_cell::sync::Lazy::new(SimdCapabilities::detect);
286
287#[allow(non_snake_case)]
288#[cfg(all(test, not(feature = "no-std")))]
289mod tests {
290    use super::*;
291
292    #[test]
293    fn test_simd_detection() {
294        let caps = SimdCapabilities::detect();
295        println!("SIMD Capabilities: {:?}", caps);
296
297        // At least one width should be available
298        assert!(caps.best_f32_width() >= 1);
299        assert!(caps.best_f64_width() >= 1);
300    }
301}