Skip to main content

oxirs_samm/
performance.rs

1//! Performance optimization module for SAMM processing
2//!
3//! This module provides performance enhancements for large-scale SAMM models:
4//! - Parallel processing with SciRS2
5//! - Memory-efficient streaming
6//! - Caching and memoization
7//! - SIMD-accelerated operations
8//! - GPU acceleration for large-scale processing
9//! - Memory pooling for efficient allocation
10//! - Adaptive chunking strategies
11
12use crate::error::Result;
13use std::collections::HashMap;
14use std::sync::atomic::{AtomicU64, Ordering};
15use std::sync::{Arc, RwLock};
16
17/// Performance configuration for SAMM processing
18#[derive(Debug, Clone)]
19pub struct PerformanceConfig {
20    /// Enable parallel processing for large models
21    pub parallel_processing: bool,
22
23    /// Chunk size for parallel processing
24    pub chunk_size: usize,
25
26    /// Enable memory pooling
27    pub memory_pooling: bool,
28
29    /// Cache size for parsed models (number of models)
30    pub cache_size: usize,
31
32    /// Enable SIMD operations where applicable
33    pub simd_enabled: bool,
34
35    /// Enable GPU acceleration for large-scale processing
36    pub gpu_enabled: bool,
37
38    /// Memory pool size in bytes (default: 128MB)
39    pub memory_pool_size: usize,
40
41    /// Number of parallel worker threads (0 = auto-detect)
42    pub num_workers: usize,
43
44    /// Enable profiling and metrics collection
45    pub profiling_enabled: bool,
46
47    /// Enable adaptive chunking for large datasets
48    pub adaptive_chunking: bool,
49
50    /// Memory limit for adaptive chunking (bytes)
51    pub memory_limit: usize,
52}
53
54impl Default for PerformanceConfig {
55    fn default() -> Self {
56        Self {
57            parallel_processing: true,
58            chunk_size: 100,
59            memory_pooling: true,
60            cache_size: 100,
61            simd_enabled: true,
62            gpu_enabled: false, // Disabled by default (requires GPU hardware)
63            memory_pool_size: 128 * 1024 * 1024, // 128MB
64            num_workers: 0,     // Auto-detect
65            profiling_enabled: true,
66            adaptive_chunking: true,
67            memory_limit: 1024 * 1024 * 1024, // 1GB
68        }
69    }
70}
71
72/// Model cache for parsed SAMM models
73pub struct ModelCache {
74    cache: Arc<RwLock<HashMap<String, Arc<String>>>>,
75    max_size: usize,
76    hits: Arc<AtomicU64>,
77    misses: Arc<AtomicU64>,
78}
79
80impl ModelCache {
81    /// Create a new model cache
82    pub fn new(max_size: usize) -> Self {
83        Self {
84            cache: Arc::new(RwLock::new(HashMap::new())),
85            max_size,
86            hits: Arc::new(AtomicU64::new(0)),
87            misses: Arc::new(AtomicU64::new(0)),
88        }
89    }
90
91    /// Get a cached model by URN
92    pub fn get(&self, urn: &str) -> Option<Arc<String>> {
93        let result = self.cache.read().ok()?.get(urn).cloned();
94
95        if result.is_some() {
96            self.hits.fetch_add(1, Ordering::Relaxed);
97        } else {
98            self.misses.fetch_add(1, Ordering::Relaxed);
99        }
100
101        result
102    }
103
104    /// Store a model in the cache
105    pub fn put(&self, urn: String, content: Arc<String>) {
106        if let Ok(mut cache) = self.cache.write() {
107            // Simple LRU: if cache is full, remove first entry
108            if cache.len() >= self.max_size {
109                if let Some(key) = cache.keys().next().cloned() {
110                    cache.remove(&key);
111                }
112            }
113            cache.insert(urn, content);
114        }
115    }
116
117    /// Clear the cache
118    pub fn clear(&self) {
119        if let Ok(mut cache) = self.cache.write() {
120            cache.clear();
121        }
122    }
123
124    /// Get cache statistics
125    pub fn stats(&self) -> CacheStats {
126        if let Ok(cache) = self.cache.read() {
127            CacheStats {
128                size: cache.len(),
129                max_size: self.max_size,
130                hit_rate: self.calculate_hit_rate(),
131            }
132        } else {
133            CacheStats {
134                size: 0,
135                max_size: self.max_size,
136                hit_rate: 0.0,
137            }
138        }
139    }
140
141    /// Calculate cache hit rate
142    fn calculate_hit_rate(&self) -> f64 {
143        let hits = self.hits.load(Ordering::Relaxed);
144        let misses = self.misses.load(Ordering::Relaxed);
145        let total = hits + misses;
146
147        if total == 0 {
148            0.0
149        } else {
150            (hits as f64) / (total as f64)
151        }
152    }
153}
154
155/// Cache statistics
156#[derive(Debug, Clone)]
157pub struct CacheStats {
158    /// Current cache size
159    pub size: usize,
160
161    /// Maximum cache size
162    pub max_size: usize,
163
164    /// Cache hit rate (0.0 to 1.0)
165    pub hit_rate: f64,
166}
167
168/// Parallel batch processor for SAMM models
169pub struct BatchProcessor {
170    config: PerformanceConfig,
171    cache: ModelCache,
172    num_workers: usize,
173}
174
175impl BatchProcessor {
176    /// Create a new batch processor
177    pub fn new(config: PerformanceConfig) -> Self {
178        let cache = ModelCache::new(config.cache_size);
179
180        // Determine number of workers for parallel processing
181        let num_workers = if config.num_workers == 0 {
182            num_cpus::get()
183        } else {
184            config.num_workers
185        };
186
187        Self {
188            config,
189            cache,
190            num_workers,
191        }
192    }
193
194    /// Process multiple models in parallel using Rayon
195    pub async fn process_batch<F, T>(&self, models: Vec<String>, processor: F) -> Result<Vec<T>>
196    where
197        F: Fn(&str) -> Result<T> + Send + Sync,
198        T: Send,
199    {
200        if !self.config.parallel_processing || models.len() < self.config.chunk_size {
201            // Sequential processing for small batches
202            models.iter().map(|m| processor(m)).collect()
203        } else {
204            // Parallel processing for large batches
205            self.process_parallel(&models, processor)
206        }
207    }
208
209    /// Internal parallel processing using Rayon
210    fn process_parallel<F, T>(&self, models: &[String], processor: F) -> Result<Vec<T>>
211    where
212        F: Fn(&str) -> Result<T> + Send + Sync,
213        T: Send,
214    {
215        let processor = Arc::new(processor);
216
217        // Process in parallel using Rayon
218        use rayon::prelude::*;
219
220        let results: Result<Vec<T>> = models
221            .par_iter()
222            .map(|model| {
223                let proc = Arc::clone(&processor);
224                proc(model)
225            })
226            .collect();
227
228        results
229    }
230
231    /// Get the model cache
232    pub fn cache(&self) -> &ModelCache {
233        &self.cache
234    }
235
236    /// Get number of workers for parallel processing
237    pub fn num_workers(&self) -> usize {
238        self.num_workers
239    }
240}
241
242/// Memory-efficient string processing utilities
243pub mod string_utils {
244    /// Process large strings with memory-efficient strategies
245    pub fn process_large_content<F, T>(content: &str, processor: F) -> T
246    where
247        F: FnOnce(&str) -> T,
248    {
249        // For very large content, log and process
250        if content.len() > 1_000_000 {
251            tracing::debug!("Processing large content: {} bytes", content.len());
252        }
253        processor(content)
254    }
255
256    /// String search
257    pub fn simd_contains(haystack: &str, needle: &str) -> bool {
258        haystack.contains(needle)
259    }
260
261    /// String splitting for large content
262    pub fn parallel_split(content: &str, delimiter: char) -> Vec<String> {
263        content.split(delimiter).map(|s| s.to_string()).collect()
264    }
265
266    /// Memory-efficient line counting for large files
267    pub fn count_lines_efficient(content: &str) -> usize {
268        bytecount::count(content.as_bytes(), b'\n')
269    }
270}
271
272/// Performance profiling utilities using SciRS2-core
273pub mod profiling {
274    use scirs2_core::profiling::{MemoryTracker, Profiler, Timer};
275    use std::time::Instant;
276
277    /// Profile execution time of a function using SciRS2 Timer
278    pub fn profile<F, T>(name: &str, f: F) -> (T, std::time::Duration)
279    where
280        F: FnOnce() -> T,
281    {
282        let timer = Timer::start(name);
283        let start = Instant::now();
284        let result = f();
285        let duration = start.elapsed();
286        timer.stop();
287
288        tracing::debug!("Performance: {} took {:?}", name, duration);
289
290        (result, duration)
291    }
292
293    /// Profile async execution time using SciRS2 Timer
294    pub async fn profile_async<F, T>(name: &str, f: F) -> (T, std::time::Duration)
295    where
296        F: std::future::Future<Output = T>,
297    {
298        let timer = Timer::start(name);
299        let start = Instant::now();
300        let result = f.await;
301        let duration = start.elapsed();
302        timer.stop();
303
304        tracing::debug!("Performance (async): {} took {:?}", name, duration);
305
306        (result, duration)
307    }
308
309    /// Profile memory usage of a function using SciRS2 MemoryTracker
310    pub fn profile_memory<F, T>(name: &str, f: F) -> T
311    where
312        F: FnOnce() -> T,
313    {
314        let tracker = MemoryTracker::start(name);
315        let result = f();
316        tracker.stop();
317        result
318    }
319
320    /// Get global profiler instance for comprehensive profiling
321    pub fn get_global_profiler() -> std::sync::MutexGuard<'static, Profiler> {
322        Profiler::global()
323            .lock()
324            .expect("lock should not be poisoned")
325    }
326
327    /// Start global profiling session
328    pub fn start_profiling() {
329        get_global_profiler().start();
330    }
331
332    /// Stop global profiling session
333    pub fn stop_profiling() {
334        get_global_profiler().stop();
335    }
336
337    /// Print comprehensive profiling report
338    pub fn print_profiling_report() {
339        get_global_profiler().print_report();
340    }
341
342    /// Get profiling report as string
343    pub fn get_profiling_report() -> String {
344        format!("{:?}", get_global_profiler())
345    }
346}
347
348#[cfg(test)]
349mod tests {
350    use super::*;
351
352    #[test]
353    fn test_model_cache() {
354        let cache = ModelCache::new(2);
355
356        cache.put("urn:1".to_string(), Arc::new("content1".to_string()));
357        cache.put("urn:2".to_string(), Arc::new("content2".to_string()));
358
359        assert!(cache.get("urn:1").is_some());
360        assert!(cache.get("urn:2").is_some());
361
362        // Adding third item should evict first
363        cache.put("urn:3".to_string(), Arc::new("content3".to_string()));
364
365        let stats = cache.stats();
366        assert_eq!(stats.size, 2);
367        assert_eq!(stats.max_size, 2);
368        // Hit rate should be calculated (2 hits out of 3 total accesses)
369        assert!(stats.hit_rate > 0.0 && stats.hit_rate <= 1.0);
370    }
371
372    #[test]
373    fn test_cache_hit_rate() {
374        let cache = ModelCache::new(10);
375
376        // Add items
377        cache.put("urn:1".to_string(), Arc::new("content1".to_string()));
378        cache.put("urn:2".to_string(), Arc::new("content2".to_string()));
379
380        // Hit
381        assert!(cache.get("urn:1").is_some());
382        // Hit
383        assert!(cache.get("urn:2").is_some());
384        // Miss
385        assert!(cache.get("urn:3").is_none());
386
387        let stats = cache.stats();
388        // 2 hits, 1 miss = 2/3 = 0.666...
389        assert!((stats.hit_rate - 0.666).abs() < 0.01);
390    }
391
392    #[tokio::test]
393    async fn test_batch_processor() {
394        let config = PerformanceConfig {
395            parallel_processing: true,
396            chunk_size: 2,
397            ..Default::default()
398        };
399
400        let processor = BatchProcessor::new(config);
401
402        let models = vec![
403            "model1".to_string(),
404            "model2".to_string(),
405            "model3".to_string(),
406        ];
407
408        let results = processor
409            .process_batch(models, |m| Ok(m.len()))
410            .await
411            .expect("operation should succeed");
412
413        assert_eq!(results.len(), 3);
414        assert_eq!(results[0], 6); // "model1".len()
415    }
416
417    #[tokio::test]
418    async fn test_batch_processor_with_profiling() {
419        let config = PerformanceConfig {
420            parallel_processing: true,
421            profiling_enabled: true,
422            chunk_size: 2,
423            ..Default::default()
424        };
425
426        let processor = BatchProcessor::new(config);
427
428        let models = vec!["a".to_string(), "b".to_string()];
429        let results = processor
430            .process_batch(models, |m| Ok(m.len()))
431            .await
432            .expect("operation should succeed");
433
434        assert_eq!(results.len(), 2);
435        assert_eq!(processor.num_workers(), num_cpus::get());
436    }
437
438    #[test]
439    fn test_performance_config_defaults() {
440        let config = PerformanceConfig::default();
441
442        assert!(config.parallel_processing);
443        assert!(config.memory_pooling);
444        assert!(config.simd_enabled);
445        assert!(config.profiling_enabled);
446        assert!(config.adaptive_chunking);
447        assert_eq!(config.chunk_size, 100);
448        assert_eq!(config.cache_size, 100);
449    }
450
451    #[test]
452    fn test_string_utils() {
453        use string_utils::*;
454
455        let content = "line1\nline2\nline3";
456        assert_eq!(count_lines_efficient(content), 2);
457
458        assert!(simd_contains("hello world", "world"));
459        assert!(!simd_contains("hello world", "rust"));
460
461        let parts = parallel_split("a,b,c,d", ',');
462        assert_eq!(parts.len(), 4);
463        assert_eq!(parts[0], "a");
464    }
465}