anomaly_grid/
performance.rs

1//! Performance optimization utilities for Anomaly Grid
2//!
3//! This module provides practical performance improvements focused on
4//! memory efficiency and computational optimization for anomaly detection.
5
6use crate::context_tree::ContextTree;
7use crate::error::AnomalyGridResult;
8use std::collections::HashMap;
9use std::time::Instant;
10
11/// Simple performance metrics for monitoring
12#[derive(Debug, Clone)]
13pub struct PerformanceMetrics {
14    /// Training time in milliseconds
15    pub training_time_ms: u64,
16    /// Detection time in milliseconds  
17    pub detection_time_ms: u64,
18    /// Number of contexts created
19    pub context_count: usize,
20    /// Estimated memory usage in bytes
21    pub estimated_memory_bytes: usize,
22}
23
24impl PerformanceMetrics {
25    /// Create new performance metrics
26    pub fn new() -> Self {
27        Self {
28            training_time_ms: 0,
29            detection_time_ms: 0,
30            context_count: 0,
31            estimated_memory_bytes: 0,
32        }
33    }
34
35    /// Calculate training throughput (elements per second)
36    pub fn training_throughput(&self, sequence_length: usize) -> f64 {
37        if self.training_time_ms == 0 {
38            return 0.0;
39        }
40        (sequence_length as f64) / (self.training_time_ms as f64 / 1000.0)
41    }
42
43    /// Calculate detection throughput (elements per second)
44    pub fn detection_throughput(&self, sequence_length: usize) -> f64 {
45        if self.detection_time_ms == 0 {
46            return 0.0;
47        }
48        (sequence_length as f64) / (self.detection_time_ms as f64 / 1000.0)
49    }
50}
51
52impl Default for PerformanceMetrics {
53    fn default() -> Self {
54        Self::new()
55    }
56}
57
58/// Context pruning for memory optimization
59impl ContextTree {
60    /// Remove contexts with low frequency counts
61    ///
62    /// This removes contexts that have been observed fewer than `min_count` times,
63    /// which can significantly reduce memory usage for large alphabets.
64    /// 
65    /// Note: Currently disabled for trie-based storage - will be reimplemented
66    pub fn prune_low_frequency_contexts(&mut self, _min_count: usize) -> usize {
67        // TODO: Implement pruning for trie-based storage
68        0
69    }
70
71    /// Remove contexts with low entropy (deterministic contexts)
72    ///
73    /// This removes contexts where the entropy is below the threshold,
74    /// indicating highly predictable transitions.
75    /// 
76    /// Note: Currently disabled for trie-based storage - will be reimplemented
77    pub fn prune_low_entropy_contexts(&mut self, _min_entropy: f64) -> usize {
78        // TODO: Implement entropy-based pruning for trie-based storage
79        0
80    }
81
82    /// Keep only the most frequent contexts up to a maximum count
83    ///
84    /// This is useful for memory-constrained environments where you want to keep
85    /// only the most important contexts.
86    /// 
87    /// Note: Currently disabled for trie-based storage - will be reimplemented
88    pub fn limit_context_count(&mut self, _max_contexts: usize) -> usize {
89        // TODO: Implement context limiting for trie-based storage
90        0
91    }
92
93    /// Estimate memory usage of the context tree
94    ///
95    /// This provides an estimate of the total memory used by the context tree,
96    /// including the trie structure, context nodes, and transition counts.
97    pub fn estimate_memory_usage(&self) -> usize {
98        self.trie().memory_usage()
99    }
100
101    /// Get context statistics for analysis
102    ///
103    /// Returns detailed statistics about the context tree structure,
104    /// including distribution by order and memory usage patterns.
105    pub fn get_context_statistics(&self) -> ContextStatistics {
106        let mut stats = ContextStatistics::new();
107        stats.total_contexts = self.trie().context_count();
108
109        for (state_ids, node) in self.trie().iter_contexts() {
110            let order = state_ids.len();
111            *stats.contexts_by_order.entry(order).or_insert(0) += 1;
112            stats.total_transitions += node.total_transitions();
113        }
114
115        if stats.total_contexts > 0 {
116            stats.avg_frequency = stats.total_transitions as f64 / stats.total_contexts as f64;
117        }
118
119        stats
120    }
121}
122
123/// Statistics about context tree structure and usage
124#[derive(Debug, Clone)]
125pub struct ContextStatistics {
126    /// Total number of contexts
127    pub total_contexts: usize,
128    /// Total number of transitions across all contexts
129    pub total_transitions: usize,
130    /// Sum of entropy across all contexts
131    pub total_entropy: f64,
132    /// Average entropy per context
133    pub avg_entropy: f64,
134    /// Average frequency per context
135    pub avg_frequency: f64,
136    /// Minimum frequency observed
137    pub min_frequency: usize,
138    /// Maximum frequency observed
139    pub max_frequency: usize,
140    /// Minimum entropy observed
141    pub min_entropy: f64,
142    /// Maximum entropy observed
143    pub max_entropy: f64,
144    /// Number of contexts by order
145    pub contexts_by_order: HashMap<usize, usize>,
146    /// Number of unique transitions by context order
147    pub transitions_by_context: HashMap<usize, usize>,
148}
149
150impl ContextStatistics {
151    /// Create new context statistics
152    pub fn new() -> Self {
153        Self {
154            total_contexts: 0,
155            total_transitions: 0,
156            total_entropy: 0.0,
157            avg_entropy: 0.0,
158            avg_frequency: 0.0,
159            min_frequency: usize::MAX,
160            max_frequency: 0,
161            min_entropy: f64::INFINITY,
162            max_entropy: 0.0,
163            contexts_by_order: HashMap::new(),
164            transitions_by_context: HashMap::new(),
165        }
166    }
167
168    /// Get memory efficiency (contexts per MB)
169    pub fn memory_efficiency(&self, memory_bytes: usize) -> f64 {
170        if memory_bytes == 0 {
171            return 0.0;
172        }
173        (self.total_contexts as f64) / (memory_bytes as f64 / 1_048_576.0)
174    }
175
176    /// Get compression ratio (transitions per context)
177    pub fn compression_ratio(&self) -> f64 {
178        if self.total_contexts == 0 {
179            return 0.0;
180        }
181        self.total_transitions as f64 / self.total_contexts as f64
182    }
183}
184
185impl Default for ContextStatistics {
186    fn default() -> Self {
187        Self::new()
188    }
189}
190
191/// Performance optimization configuration
192#[derive(Debug, Clone)]
193pub struct OptimizationConfig {
194    /// Enable context pruning
195    pub enable_pruning: bool,
196    /// Minimum count for context pruning
197    pub min_context_count: usize,
198    /// Minimum entropy for context pruning
199    pub min_entropy: f64,
200    /// Maximum number of contexts to keep
201    pub max_contexts: Option<usize>,
202    /// Enable performance monitoring
203    pub enable_monitoring: bool,
204}
205
206impl Default for OptimizationConfig {
207    fn default() -> Self {
208        Self {
209            enable_pruning: false,
210            min_context_count: 2,
211            min_entropy: 0.1,
212            max_contexts: None,
213            enable_monitoring: true,
214        }
215    }
216}
217
218impl OptimizationConfig {
219    /// Create configuration for memory-constrained environments
220    pub fn for_low_memory() -> Self {
221        Self {
222            enable_pruning: true,
223            min_context_count: 3,
224            min_entropy: 0.2,
225            max_contexts: Some(10_000),
226            enable_monitoring: true,
227        }
228    }
229
230    /// Create configuration for high-accuracy requirements
231    pub fn for_high_accuracy() -> Self {
232        Self {
233            enable_pruning: false,
234            min_context_count: 1,
235            min_entropy: 0.0,
236            max_contexts: None,
237            enable_monitoring: true,
238        }
239    }
240
241    /// Create configuration for balanced performance
242    pub fn balanced() -> Self {
243        Self {
244            enable_pruning: true,
245            min_context_count: 2,
246            min_entropy: 0.05,
247            max_contexts: Some(100_000),
248            enable_monitoring: true,
249        }
250    }
251}
252
253/// Apply performance optimizations to a context tree
254pub fn optimize_context_tree(
255    tree: &mut ContextTree,
256    config: &OptimizationConfig,
257) -> AnomalyGridResult<PerformanceMetrics> {
258    let start_time = Instant::now();
259    let _initial_count = tree.context_count();
260
261    if config.enable_pruning {
262        // Apply frequency-based pruning
263        if config.min_context_count > 1 {
264            tree.prune_low_frequency_contexts(config.min_context_count);
265        }
266
267        // Apply entropy-based pruning
268        if config.min_entropy > 0.0 {
269            tree.prune_low_entropy_contexts(config.min_entropy);
270        }
271
272        // Apply maximum context limit
273        if let Some(_max_contexts) = config.max_contexts {
274            // TODO: Implement context limiting for trie-based storage
275            tree.limit_context_count(_max_contexts);
276        }
277    }
278
279    let optimization_time = start_time.elapsed();
280
281    let mut metrics = PerformanceMetrics::new();
282    metrics.training_time_ms = optimization_time.as_millis() as u64;
283    metrics.context_count = tree.context_count();
284    metrics.estimated_memory_bytes = tree.estimate_memory_usage();
285
286    Ok(metrics)
287}
288
289#[cfg(test)]
290mod tests {
291    use super::*;
292
293
294    #[test]
295    fn test_performance_metrics() {
296        let mut metrics = PerformanceMetrics::new();
297
298        metrics.training_time_ms = 100; // 0.1 second
299        metrics.detection_time_ms = 50; // 0.05 seconds
300
301        // Test throughput calculations
302        // 1000 elements / 0.1 seconds = 10,000 elements/second
303        assert_eq!(metrics.training_throughput(1000), 10000.0);
304        // 500 elements / 0.05 seconds = 10,000 elements/second
305        assert_eq!(metrics.detection_throughput(500), 10000.0);
306    }
307
308    #[test]
309    fn test_context_pruning() {
310        let mut tree = ContextTree::new(2).expect("Failed to create tree");
311        
312        // Build contexts using the proper API
313        let config = crate::config::AnomalyGridConfig::default();
314        
315        // Create high frequency sequence
316        let high_freq_sequence: Vec<String> = std::iter::repeat_n("X".to_string(), 5)
317            .chain(std::iter::repeat_n("A".to_string(), 10))
318            .collect();
319        tree.build_from_sequence(&high_freq_sequence, &config).expect("Failed to build");
320        
321        // Create low frequency sequence  
322        let low_freq_sequence = vec!["Y".to_string(), "B".to_string()];
323        tree.build_from_sequence(&low_freq_sequence, &config).expect("Failed to build");
324
325        let initial_count = tree.context_count();
326        assert!(initial_count > 0);
327
328        // Prune contexts with frequency < 5 (currently returns 0 as it's not implemented)
329        let pruned = tree.prune_low_frequency_contexts(5);
330        
331        // Since pruning is not implemented for trie storage, it returns 0
332        assert_eq!(pruned, 0);
333        assert_eq!(tree.context_count(), initial_count);
334    }
335
336    #[test]
337    fn test_memory_estimation() {
338        let mut tree = ContextTree::new(2).expect("Failed to create tree");
339        
340        // Build contexts using the proper API
341        let config = crate::config::AnomalyGridConfig::default();
342        let sequence = vec!["X".to_string(), "A".to_string(), "B".to_string()];
343        tree.build_from_sequence(&sequence, &config).expect("Failed to build");
344
345        let memory_usage = tree.estimate_memory_usage();
346        assert!(memory_usage > 0);
347    }
348
349    #[test]
350    fn test_context_statistics() {
351        let mut tree = ContextTree::new(2).expect("Failed to create tree");
352        
353        // Build contexts using the proper API
354        let config = crate::config::AnomalyGridConfig::default();
355        
356        // Create sequences that will generate contexts of different orders
357        let sequence1 = vec!["X".to_string(), "A".to_string(), "B".to_string()];
358        tree.build_from_sequence(&sequence1, &config).expect("Failed to build");
359        
360        let sequence2 = vec!["Y".to_string(), "Z".to_string(), "C".to_string()];
361        tree.build_from_sequence(&sequence2, &config).expect("Failed to build");
362
363        let stats = tree.get_context_statistics();
364
365        assert!(stats.total_contexts > 0);
366        // With max_order=2, we should have contexts of order 1 and 2
367        assert!(stats.contexts_by_order.contains_key(&1));
368        assert!(stats.contexts_by_order.contains_key(&2));
369    }
370
371    #[test]
372    fn test_optimization_config() {
373        let low_memory = OptimizationConfig::for_low_memory();
374        assert!(low_memory.enable_pruning);
375        assert!(low_memory.max_contexts.is_some());
376
377        let high_accuracy = OptimizationConfig::for_high_accuracy();
378        assert!(!high_accuracy.enable_pruning);
379        assert!(high_accuracy.max_contexts.is_none());
380
381        let balanced = OptimizationConfig::balanced();
382        assert!(balanced.enable_pruning);
383        assert!(balanced.max_contexts.is_some());
384    }
385
386    #[test]
387    fn test_optimize_context_tree() {
388        let mut tree = ContextTree::new(2).expect("Failed to create tree");
389        
390        // Build contexts using the proper API
391        let config_build = crate::config::AnomalyGridConfig::default();
392        
393        // Create sequences with different patterns to generate various contexts
394        for i in 1..=5 {
395            let sequence: Vec<String> = (0..i+2)
396                .map(|j| format!("S{}", j % 3))
397                .collect();
398            tree.build_from_sequence(&sequence, &config_build).expect("Failed to build");
399        }
400
401        let initial_count = tree.context_count();
402        assert!(initial_count > 0);
403
404        // Use optimization config
405        let config = OptimizationConfig {
406            enable_pruning: true,
407            min_context_count: 2,
408            min_entropy: 0.0,
409            max_contexts: Some(8),
410            enable_monitoring: true,
411        };
412
413        let metrics = optimize_context_tree(&mut tree, &config).expect("Failed to optimize");
414
415        // Since pruning is not implemented for trie storage, context count should remain the same
416        assert_eq!(tree.context_count(), initial_count);
417        assert_eq!(metrics.context_count, tree.context_count());
418        assert!(tree.context_count() > 0);
419        assert!(metrics.estimated_memory_bytes > 0);
420    }
421}