ruvector_scipix/optimize/
mod.rs

1//! Performance optimization utilities for scipix OCR
2//!
3//! This module provides runtime feature detection and optimized code paths
4//! for different CPU architectures and capabilities.
5
6pub mod simd;
7pub mod parallel;
8pub mod memory;
9pub mod quantize;
10pub mod batch;
11
12use std::sync::OnceLock;
13
14/// CPU features detected at runtime
15#[derive(Debug, Clone, Copy)]
16pub struct CpuFeatures {
17    pub avx2: bool,
18    pub avx512f: bool,
19    pub neon: bool,
20    pub sse4_2: bool,
21}
22
23static CPU_FEATURES: OnceLock<CpuFeatures> = OnceLock::new();
24
25/// Detect CPU features at runtime
26pub fn detect_features() -> CpuFeatures {
27    *CPU_FEATURES.get_or_init(|| {
28        #[cfg(target_arch = "x86_64")]
29        {
30            CpuFeatures {
31                avx2: is_x86_feature_detected!("avx2"),
32                avx512f: is_x86_feature_detected!("avx512f"),
33                neon: false,
34                sse4_2: is_x86_feature_detected!("sse4.2"),
35            }
36        }
37        #[cfg(target_arch = "aarch64")]
38        {
39            CpuFeatures {
40                avx2: false,
41                avx512f: false,
42                neon: std::arch::is_aarch64_feature_detected!("neon"),
43                sse4_2: false,
44            }
45        }
46        #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
47        {
48            CpuFeatures {
49                avx2: false,
50                avx512f: false,
51                neon: false,
52                sse4_2: false,
53            }
54        }
55    })
56}
57
58/// Get the detected CPU features
59pub fn get_features() -> CpuFeatures {
60    detect_features()
61}
62
63/// Runtime dispatch to optimized implementation
64pub trait OptimizedOp<T> {
65    /// Execute the operation with the best available implementation
66    fn execute(&self, input: T) -> T;
67
68    /// Execute with SIMD if available, fallback to scalar
69    fn execute_auto(&self, input: T) -> T {
70        let features = get_features();
71        if features.avx2 || features.avx512f || features.neon {
72            self.execute_simd(input)
73        } else {
74            self.execute_scalar(input)
75        }
76    }
77
78    /// SIMD implementation
79    fn execute_simd(&self, input: T) -> T;
80
81    /// Scalar fallback implementation
82    fn execute_scalar(&self, input: T) -> T;
83}
84
85/// Optimization level configuration
86#[derive(Debug, Clone, Copy, PartialEq, Eq)]
87pub enum OptLevel {
88    /// No optimizations, scalar code only
89    None,
90    /// Use SIMD when available
91    Simd,
92    /// Use SIMD + parallel processing
93    Parallel,
94    /// All optimizations including memory optimizations
95    Full,
96}
97
98impl Default for OptLevel {
99    fn default() -> Self {
100        OptLevel::Full
101    }
102}
103
104/// Global optimization configuration
105static OPT_LEVEL: OnceLock<OptLevel> = OnceLock::new();
106
107/// Set the optimization level
108pub fn set_opt_level(level: OptLevel) {
109    OPT_LEVEL.set(level).ok();
110}
111
112/// Get the current optimization level
113pub fn get_opt_level() -> OptLevel {
114    *OPT_LEVEL.get_or_init(OptLevel::default)
115}
116
117/// Check if SIMD optimizations are enabled
118pub fn simd_enabled() -> bool {
119    matches!(get_opt_level(), OptLevel::Simd | OptLevel::Parallel | OptLevel::Full)
120}
121
122/// Check if parallel optimizations are enabled
123pub fn parallel_enabled() -> bool {
124    matches!(get_opt_level(), OptLevel::Parallel | OptLevel::Full)
125}
126
127/// Check if memory optimizations are enabled
128pub fn memory_opt_enabled() -> bool {
129    matches!(get_opt_level(), OptLevel::Full)
130}
131
132#[cfg(test)]
133mod tests {
134    use super::*;
135
136    #[test]
137    fn test_feature_detection() {
138        let features = detect_features();
139        println!("Detected features: {:?}", features);
140
141        // Should always succeed on any platform
142        assert!(
143            features.avx2 || features.avx512f || features.neon || features.sse4_2
144            || (!features.avx2 && !features.avx512f && !features.neon && !features.sse4_2)
145        );
146    }
147
148    #[test]
149    fn test_opt_level() {
150        assert_eq!(get_opt_level(), OptLevel::Full);
151
152        set_opt_level(OptLevel::Simd);
153        // Can't change after first init, should still be Full
154        assert_eq!(get_opt_level(), OptLevel::Full);
155    }
156
157    #[test]
158    fn test_optimization_checks() {
159        assert!(simd_enabled());
160        assert!(parallel_enabled());
161        assert!(memory_opt_enabled());
162    }
163}