mathhook_core/core/performance/
config.rs

1//! Global performance configuration management
2//!
3//! This module provides a global configuration system that allows:
4//! 1. Binding crates to set their optimal configuration once
5//! 2. Core operations to use the global config by default
6//! 3. Explicit overrides when needed for specific operations
7
8use super::simd::{SimdOps, SimdOptimized};
9use super::strategy::{BindingContext, PerformanceConfig};
10use crate::core::Expression;
11use num_traits::ToPrimitive;
12use rayon::prelude::*;
13use std::collections::HashMap;
14use std::sync::{Arc, OnceLock, RwLock};
15
16/// Global performance configuration instance
17static GLOBAL_CONFIG: OnceLock<Arc<RwLock<PerformanceConfig>>> = OnceLock::new();
18
19/// Initialize global configuration (called automatically)
20fn ensure_global_config() -> &'static Arc<RwLock<PerformanceConfig>> {
21    GLOBAL_CONFIG.get_or_init(|| Arc::new(RwLock::new(PerformanceConfig::default())))
22}
23
24/// Get the current global performance configuration
25pub fn get_global_config() -> PerformanceConfig {
26    let config_lock = ensure_global_config();
27    config_lock
28        .read()
29        .expect("BUG: Global performance config lock poisoned - indicates panic during config read in another thread")
30        .clone()
31}
32
33/// Set the global performance configuration
34///
35/// This is typically called once by binding crates during initialization:
36///
37/// ```ignore
38/// // In mathhook-python/src/lib.rs
39/// use mathhook_core::core::performance::config::set_global_config;
40/// use mathhook_core::core::performance::strategy::{PerformanceConfig, BindingContext};
41///
42/// #[pymodule]
43/// fn mathhook_python(_py: Python, m: &PyModule) -> PyResult<()> {
44///     // Set Python-optimized configuration globally
45///     let config = PerformanceConfig::for_binding(BindingContext::Python);
46///     set_global_config(config);
47///     Ok(())
48/// }
49/// ```
50pub fn set_global_config(config: PerformanceConfig) {
51    let config_lock = ensure_global_config();
52    *config_lock
53        .write()
54        .expect("BUG: Global performance config lock poisoned - indicates panic during config write in another thread") = config;
55}
56
57/// Set global configuration for a specific binding context
58///
59/// Convenience method for binding crates:
60///
61/// ```ignore
62/// // In mathhook-node/src/lib.rs
63/// use mathhook_core::core::performance::config::set_binding_config;
64/// use mathhook_core::core::performance::strategy::BindingContext;
65///
66/// fn init_mathhook_node() {
67///     set_binding_config(BindingContext::NodeJs);
68/// }
69/// ```
70pub fn set_binding_config(context: BindingContext) {
71    let config = PerformanceConfig::for_binding(context);
72    set_global_config(config);
73}
74
75/// Update specific configuration parameters without replacing the entire config
76///
77/// Useful for runtime tuning:
78///
79/// ```
80/// use mathhook_core::core::performance::config::update_global_config;
81///
82/// // Disable parallelism at runtime
83/// update_global_config(|config| {
84///     config.parallel_enabled = false;
85/// });
86/// ```
87pub fn update_global_config<F>(updater: F)
88where
89    F: FnOnce(&mut PerformanceConfig),
90{
91    let config_lock = ensure_global_config();
92    if let Ok(mut config) = config_lock.write() {
93        updater(&mut config);
94    }
95}
96
97/// Get configuration statistics for monitoring
98pub fn get_config_info() -> ConfigInfo {
99    let config = get_global_config();
100    ConfigInfo {
101        simd_enabled: config.simd_enabled,
102        simd_threshold: config.simd_threshold,
103        memoization_enabled: config.memoization_enabled,
104        cache_size_limit: config.cache_size_limit,
105        parallel_enabled: config.parallel_enabled,
106        parallel_threshold: config.parallel_threshold,
107    }
108}
109
110/// Configuration information for monitoring and debugging
111#[derive(Debug, Clone)]
112pub struct ConfigInfo {
113    pub simd_enabled: bool,
114    pub simd_threshold: usize,
115    pub memoization_enabled: bool,
116    pub cache_size_limit: usize,
117    pub parallel_enabled: bool,
118    pub parallel_threshold: usize,
119}
120
121/// Comprehensive cache statistics for performance monitoring
122#[derive(Debug, Clone)]
123pub struct CacheStatistics {
124    /// Current number of cached expressions
125    pub current_size: usize,
126    /// Maximum cache capacity
127    pub max_size: usize,
128    /// Estimated memory usage in bytes
129    pub memory_estimate_bytes: usize,
130    /// Cache utilization as percentage (0.0 - 100.0)
131    pub utilization_percent: f64,
132    /// Whether the cache is at maximum capacity
133    pub is_full: bool,
134}
135
136impl Default for CacheStatistics {
137    fn default() -> Self {
138        Self {
139            current_size: 0,
140            max_size: CACHE_SIZE_LIMIT,
141            memory_estimate_bytes: 0,
142            utilization_percent: 0.0,
143            is_full: false,
144        }
145    }
146}
147
148/// Comprehensive performance monitoring data
149#[derive(Debug, Clone)]
150pub struct PerformanceMetrics {
151    /// Current configuration
152    pub config: ConfigInfo,
153    /// Cache statistics
154    pub cache: CacheStatistics,
155    /// SIMD usage statistics
156    pub simd_stats: SimdStatistics,
157    /// Parallel processing statistics
158    pub parallel_stats: ParallelStatistics,
159}
160
161/// SIMD operation statistics
162#[derive(Debug, Clone)]
163pub struct SimdStatistics {
164    /// Total SIMD operations performed
165    pub operations_count: u64,
166    /// Total elements processed via SIMD
167    pub elements_processed: u64,
168    /// Average elements per SIMD operation
169    pub avg_elements_per_op: f64,
170}
171
172impl Default for SimdStatistics {
173    fn default() -> Self {
174        Self {
175            operations_count: 0,
176            elements_processed: 0,
177            avg_elements_per_op: 0.0,
178        }
179    }
180}
181
182/// Parallel processing statistics
183#[derive(Debug, Clone)]
184pub struct ParallelStatistics {
185    /// Total parallel operations performed
186    pub operations_count: u64,
187    /// Total elements processed in parallel
188    pub elements_processed: u64,
189    /// Average elements per parallel operation
190    pub avg_elements_per_op: f64,
191}
192
193impl Default for ParallelStatistics {
194    fn default() -> Self {
195        Self {
196            operations_count: 0,
197            elements_processed: 0,
198            avg_elements_per_op: 0.0,
199        }
200    }
201}
202
203/// Performance thresholds based on benchmarks
204const SIMD_THRESHOLD: usize = 50; // 50+ elements benefit from SIMD
205const CACHE_SIZE_LIMIT: usize = 10000; // 10K expressions = ~10MB cache
206
207/// Parallelism threshold - exported for binding-specific crates to use
208pub const PARALLEL_THRESHOLD: usize = 1000; // 1000+ elements benefit from parallelism
209
210/// Global memoization cache for expensive operations
211static GLOBAL_CACHE: OnceLock<Arc<RwLock<HashMap<u64, Expression>>>> = OnceLock::new();
212
213/// Get reference to global cache (initializes if needed)
214fn get_global_cache() -> &'static Arc<RwLock<HashMap<u64, Expression>>> {
215    GLOBAL_CACHE.get_or_init(|| Arc::new(RwLock::new(HashMap::new())))
216}
217
218/// Get cached result for expression hash
219pub fn get_cached_result(expr_hash: u64) -> Option<Expression> {
220    let cache = get_global_cache();
221    cache.read().ok()?.get(&expr_hash).cloned()
222}
223
224/// Cache a computation result
225pub fn cache_result(expr_hash: u64, result: Expression) {
226    let cache_arc = get_global_cache();
227    if let Ok(mut cache) = cache_arc.write() {
228        // Simple LRU: remove oldest if cache is full
229        if cache.len() >= CACHE_SIZE_LIMIT {
230            if let Some(oldest_key) = cache.keys().next().copied() {
231                cache.remove(&oldest_key);
232            }
233        }
234        cache.insert(expr_hash, result);
235    }
236}
237
238/// Smart SIMD decision: use SIMD only when beneficial
239#[inline(always)]
240pub fn should_use_simd(operation_size: usize) -> bool {
241    operation_size >= SIMD_THRESHOLD
242}
243
244/// Check if operation size meets parallel threshold (for binding-specific use)
245#[inline(always)]
246pub fn meets_parallel_threshold(operation_size: usize) -> bool {
247    operation_size >= PARALLEL_THRESHOLD
248}
249
250/// SIMD-optimized bulk numeric addition
251pub fn simd_bulk_add_numeric(values: &[f64]) -> f64 {
252    if should_use_simd(values.len()) {
253        SimdOptimized::bulk_add_numeric(values)
254    } else {
255        values.iter().sum()
256    }
257}
258
259/// SIMD-optimized bulk numeric multiplication
260pub fn simd_bulk_multiply_numeric(values: &[f64]) -> f64 {
261    if should_use_simd(values.len()) {
262        // Use SIMD for large arrays
263        values
264            .chunks(4)
265            .map(|chunk| {
266                let mut result = vec![0.0; chunk.len()];
267                let ones = vec![1.0; chunk.len()];
268                SimdOps::mul_f64_array(chunk, &ones, &mut result);
269                result.iter().product::<f64>()
270            })
271            .product()
272    } else {
273        // Fallback for small arrays
274        values.iter().product()
275    }
276}
277
278/// Extract numeric values from expressions for SIMD processing
279pub fn extract_numeric_f64(expressions: &[Expression]) -> (Vec<f64>, Vec<Expression>) {
280    let mut numerics = Vec::new();
281    let mut non_numerics = Vec::new();
282
283    for expr in expressions {
284        match expr {
285            Expression::Number(crate::core::Number::Integer(i)) => {
286                if let Some(f) = i.to_f64() {
287                    numerics.push(f);
288                } else {
289                    non_numerics.push(expr.clone());
290                }
291            }
292            Expression::Number(crate::core::Number::Float(f)) => {
293                numerics.push(*f);
294            }
295            Expression::Number(crate::core::Number::Rational(r)) => {
296                if let Some(f) = r.to_f64() {
297                    numerics.push(f);
298                } else {
299                    non_numerics.push(expr.clone());
300                }
301            }
302            _ => non_numerics.push(expr.clone()),
303        }
304    }
305
306    (numerics, non_numerics)
307}
308
309/// Compute hash for expression (for memoization)
310pub fn compute_expr_hash(expr: &Expression) -> u64 {
311    use std::collections::hash_map::DefaultHasher;
312    use std::hash::{Hash, Hasher};
313
314    let mut hasher = DefaultHasher::new();
315    // Simple hash based on expression structure
316    std::mem::discriminant(expr).hash(&mut hasher);
317    hasher.finish()
318}
319
320/// Get comprehensive cache statistics for monitoring
321pub fn cache_stats() -> CacheStatistics {
322    let cache_arc = get_global_cache();
323    if let Ok(cache) = cache_arc.read() {
324        let current_size = cache.len();
325        let memory_estimate = current_size * 512; // Rough estimate: 512 bytes per expression
326        let utilization = (current_size as f64 / CACHE_SIZE_LIMIT as f64) * 100.0;
327
328        CacheStatistics {
329            current_size,
330            max_size: CACHE_SIZE_LIMIT,
331            memory_estimate_bytes: memory_estimate,
332            utilization_percent: utilization,
333            is_full: current_size >= CACHE_SIZE_LIMIT,
334        }
335    } else {
336        CacheStatistics::default()
337    }
338}
339
340/// Get comprehensive performance metrics for monitoring and debugging
341///
342/// Note: SIMD and parallel statistics tracking is not yet implemented.
343/// These fields return default values until runtime tracking is added.
344pub fn get_performance_metrics() -> PerformanceMetrics {
345    PerformanceMetrics {
346        config: get_config_info(),
347        cache: cache_stats(),
348        simd_stats: SimdStatistics::default(),
349        parallel_stats: ParallelStatistics::default(),
350    }
351}
352
353/// Get performance summary as human-readable string
354pub fn get_performance_summary() -> String {
355    let metrics = get_performance_metrics();
356
357    format!(
358        "MathHook Performance Summary:\n\
359         ├─ Configuration:\n\
360         │  ├─ SIMD: {} (threshold: {})\n\
361         │  ├─ Parallelism: {} (threshold: {})\n\
362         │  └─ Memoization: {} (limit: {})\n\
363         ├─ Cache Statistics:\n\
364         │  ├─ Size: {}/{} ({:.1}%)\n\
365         │  ├─ Memory: {:.2} KB\n\
366         │  └─ Status: {}\n\
367         └─ Optimization Status: {}",
368        if metrics.config.simd_enabled {
369            "Enabled"
370        } else {
371            "Disabled"
372        },
373        metrics.config.simd_threshold,
374        if metrics.config.parallel_enabled {
375            "Enabled"
376        } else {
377            "Disabled"
378        },
379        metrics.config.parallel_threshold,
380        if metrics.config.memoization_enabled {
381            "Enabled"
382        } else {
383            "Disabled"
384        },
385        metrics.config.cache_size_limit,
386        metrics.cache.current_size,
387        metrics.cache.max_size,
388        metrics.cache.utilization_percent,
389        metrics.cache.memory_estimate_bytes as f64 / 1024.0,
390        if metrics.cache.is_full {
391            "Full"
392        } else {
393            "Available"
394        },
395        if metrics.config.simd_enabled && metrics.config.memoization_enabled {
396            "Fully Optimized"
397        } else {
398            "Partially Optimized"
399        }
400    )
401}
402
403/// Clear the global cache
404pub fn clear_cache() {
405    let cache_arc = get_global_cache();
406    if let Ok(mut cache) = cache_arc.write() {
407        cache.clear();
408    }
409}
410
411/// Parallel bulk expression simplification
412pub fn parallel_bulk_simplify(expressions: &[Expression]) -> Vec<Expression> {
413    let config = get_global_config();
414
415    if config.parallel_enabled && expressions.len() >= config.parallel_threshold {
416        // Use parallel processing for large collections
417        expressions.par_iter().map(|expr| expr.clone()).collect()
418    } else {
419        // Sequential processing for small collections or when parallel disabled
420        expressions.to_vec()
421    }
422}
423
424/// Parallel matrix element processing
425pub fn parallel_matrix_process<F, T>(matrix_rows: &[Vec<Expression>], processor: F) -> Vec<Vec<T>>
426where
427    F: Fn(&Expression) -> T + Sync + Send,
428    T: Send,
429{
430    let config = get_global_config();
431    let total_elements: usize = matrix_rows.iter().map(|row| row.len()).sum();
432
433    if config.parallel_enabled && total_elements >= config.parallel_threshold {
434        // Use parallel processing for large matrices
435        matrix_rows
436            .par_iter()
437            .map(|row| row.par_iter().map(&processor).collect())
438            .collect()
439    } else {
440        // Sequential processing for small matrices or when parallel disabled
441        matrix_rows
442            .iter()
443            .map(|row| row.iter().map(&processor).collect())
444            .collect()
445    }
446}
447
448/// Parallel numeric operations with SIMD + Rayon combination
449pub fn parallel_simd_bulk_add(values: &[f64]) -> f64 {
450    let config = get_global_config();
451
452    if config.parallel_enabled && values.len() >= config.parallel_threshold {
453        // Use parallel chunks with SIMD for very large arrays
454        let chunk_size = config.parallel_threshold / 4; // Optimal chunk size
455        values
456            .par_chunks(chunk_size)
457            .map(simd_bulk_add_numeric)
458            .sum()
459    } else {
460        // Use SIMD only for smaller arrays
461        simd_bulk_add_numeric(values)
462    }
463}
464
465/// Parallel numeric multiplication with SIMD + Rayon combination
466pub fn parallel_simd_bulk_multiply(values: &[f64]) -> f64 {
467    let config = get_global_config();
468
469    if config.parallel_enabled && values.len() >= config.parallel_threshold {
470        // Use parallel chunks with SIMD for very large arrays
471        let chunk_size = config.parallel_threshold / 4; // Optimal chunk size
472        values
473            .par_chunks(chunk_size)
474            .map(simd_bulk_multiply_numeric)
475            .product()
476    } else {
477        // Use SIMD only for smaller arrays
478        simd_bulk_multiply_numeric(values)
479    }
480}
481
482#[cfg(test)]
483mod tests {
484    use super::*;
485
486    #[test]
487    fn test_global_config_initialization() {
488        let config = get_global_config();
489        assert!(config.simd_enabled);
490        assert!(config.memoization_enabled);
491    }
492
493    #[test]
494    fn test_binding_config_setting() {
495        set_binding_config(BindingContext::Python);
496        let config = get_global_config();
497        assert!(!config.parallel_enabled); // Python should disable parallelism
498        assert_eq!(config.parallel_threshold, usize::MAX);
499    }
500
501    #[test]
502    fn test_config_update() {
503        let original_threshold = get_global_config().simd_threshold;
504
505        update_global_config(|config| {
506            config.simd_threshold = 100;
507        });
508
509        let updated_config = get_global_config();
510        assert_eq!(updated_config.simd_threshold, 100);
511
512        // Reset for other tests
513        update_global_config(|config| {
514            config.simd_threshold = original_threshold;
515        });
516    }
517}