ruvector_scipix/optimize/
mod.rs1pub mod simd;
7pub mod parallel;
8pub mod memory;
9pub mod quantize;
10pub mod batch;
11
12use std::sync::OnceLock;
13
14#[derive(Debug, Clone, Copy)]
16pub struct CpuFeatures {
17 pub avx2: bool,
18 pub avx512f: bool,
19 pub neon: bool,
20 pub sse4_2: bool,
21}
22
23static CPU_FEATURES: OnceLock<CpuFeatures> = OnceLock::new();
24
25pub fn detect_features() -> CpuFeatures {
27 *CPU_FEATURES.get_or_init(|| {
28 #[cfg(target_arch = "x86_64")]
29 {
30 CpuFeatures {
31 avx2: is_x86_feature_detected!("avx2"),
32 avx512f: is_x86_feature_detected!("avx512f"),
33 neon: false,
34 sse4_2: is_x86_feature_detected!("sse4.2"),
35 }
36 }
37 #[cfg(target_arch = "aarch64")]
38 {
39 CpuFeatures {
40 avx2: false,
41 avx512f: false,
42 neon: std::arch::is_aarch64_feature_detected!("neon"),
43 sse4_2: false,
44 }
45 }
46 #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
47 {
48 CpuFeatures {
49 avx2: false,
50 avx512f: false,
51 neon: false,
52 sse4_2: false,
53 }
54 }
55 })
56}
57
58pub fn get_features() -> CpuFeatures {
60 detect_features()
61}
62
63pub trait OptimizedOp<T> {
65 fn execute(&self, input: T) -> T;
67
68 fn execute_auto(&self, input: T) -> T {
70 let features = get_features();
71 if features.avx2 || features.avx512f || features.neon {
72 self.execute_simd(input)
73 } else {
74 self.execute_scalar(input)
75 }
76 }
77
78 fn execute_simd(&self, input: T) -> T;
80
81 fn execute_scalar(&self, input: T) -> T;
83}
84
85#[derive(Debug, Clone, Copy, PartialEq, Eq)]
87pub enum OptLevel {
88 None,
90 Simd,
92 Parallel,
94 Full,
96}
97
98impl Default for OptLevel {
99 fn default() -> Self {
100 OptLevel::Full
101 }
102}
103
104static OPT_LEVEL: OnceLock<OptLevel> = OnceLock::new();
106
107pub fn set_opt_level(level: OptLevel) {
109 OPT_LEVEL.set(level).ok();
110}
111
112pub fn get_opt_level() -> OptLevel {
114 *OPT_LEVEL.get_or_init(OptLevel::default)
115}
116
117pub fn simd_enabled() -> bool {
119 matches!(get_opt_level(), OptLevel::Simd | OptLevel::Parallel | OptLevel::Full)
120}
121
122pub fn parallel_enabled() -> bool {
124 matches!(get_opt_level(), OptLevel::Parallel | OptLevel::Full)
125}
126
127pub fn memory_opt_enabled() -> bool {
129 matches!(get_opt_level(), OptLevel::Full)
130}
131
132#[cfg(test)]
133mod tests {
134 use super::*;
135
136 #[test]
137 fn test_feature_detection() {
138 let features = detect_features();
139 println!("Detected features: {:?}", features);
140
141 assert!(
143 features.avx2 || features.avx512f || features.neon || features.sse4_2
144 || (!features.avx2 && !features.avx512f && !features.neon && !features.sse4_2)
145 );
146 }
147
148 #[test]
149 fn test_opt_level() {
150 assert_eq!(get_opt_level(), OptLevel::Full);
151
152 set_opt_level(OptLevel::Simd);
153 assert_eq!(get_opt_level(), OptLevel::Full);
155 }
156
157 #[test]
158 fn test_optimization_checks() {
159 assert!(simd_enabled());
160 assert!(parallel_enabled());
161 assert!(memory_opt_enabled());
162 }
163}