anomaly_grid/
performance.rs

1//! Performance optimization utilities for Anomaly Grid
2//!
3//! This module provides practical performance improvements focused on
4//! memory efficiency and computational optimization for anomaly detection.
5
6use crate::context_tree::ContextTree;
7use crate::error::AnomalyGridResult;
8use std::collections::HashMap;
9use std::time::Instant;
10
11/// Simple performance metrics for monitoring
12#[derive(Debug, Clone)]
13pub struct PerformanceMetrics {
14    /// Training time in milliseconds
15    pub training_time_ms: u64,
16    /// Detection time in milliseconds  
17    pub detection_time_ms: u64,
18    /// Number of contexts created
19    pub context_count: usize,
20    /// Estimated memory usage in bytes
21    pub estimated_memory_bytes: usize,
22}
23
24impl PerformanceMetrics {
25    /// Create new performance metrics
26    pub fn new() -> Self {
27        Self {
28            training_time_ms: 0,
29            detection_time_ms: 0,
30            context_count: 0,
31            estimated_memory_bytes: 0,
32        }
33    }
34
35    /// Calculate training throughput (elements per second)
36    pub fn training_throughput(&self, sequence_length: usize) -> f64 {
37        if self.training_time_ms == 0 {
38            return 0.0;
39        }
40        (sequence_length as f64) / (self.training_time_ms as f64 / 1000.0)
41    }
42
43    /// Calculate detection throughput (elements per second)
44    pub fn detection_throughput(&self, sequence_length: usize) -> f64 {
45        if self.detection_time_ms == 0 {
46            return 0.0;
47        }
48        (sequence_length as f64) / (self.detection_time_ms as f64 / 1000.0)
49    }
50}
51
52impl Default for PerformanceMetrics {
53    fn default() -> Self {
54        Self::new()
55    }
56}
57
58/// Context pruning for memory optimization
59impl ContextTree {
60    /// Remove low-frequency contexts to reduce memory usage
61    ///
62    /// This removes contexts that have been observed fewer than `min_count` times,
63    /// which can significantly reduce memory usage for large alphabets.
64    pub fn prune_low_frequency_contexts(&mut self, min_count: usize) -> usize {
65        let initial_count = self.contexts.len();
66
67        self.contexts.retain(|_, node| {
68            let total_transitions: usize = node.counts.values().sum();
69            total_transitions >= min_count
70        });
71
72        initial_count - self.contexts.len()
73    }
74
75    /// Remove contexts with low entropy (deterministic contexts)
76    ///
77    /// Contexts with very low entropy provide little information and can be pruned
78    /// to reduce memory usage while maintaining detection accuracy.
79    pub fn prune_low_entropy_contexts(&mut self, min_entropy: f64) -> usize {
80        let initial_count = self.contexts.len();
81
82        self.contexts.retain(|_, node| node.entropy >= min_entropy);
83
84        initial_count - self.contexts.len()
85    }
86
87    /// Keep only the most frequent contexts up to a maximum count
88    ///
89    /// This is useful for memory-constrained environments where you want to
90    /// keep only the most important contexts.
91    pub fn keep_top_contexts(&mut self, max_contexts: usize) -> usize {
92        if self.contexts.len() <= max_contexts {
93            return 0;
94        }
95
96        let initial_count = self.contexts.len();
97
98        // Sort contexts by total frequency
99        let mut context_frequencies: Vec<_> = self
100            .contexts
101            .iter()
102            .map(|(context, node)| {
103                let freq: usize = node.counts.values().sum();
104                (context.clone(), freq)
105            })
106            .collect();
107
108        context_frequencies.sort_by(|a, b| b.1.cmp(&a.1));
109
110        // Keep only top contexts
111        let contexts_to_keep: std::collections::HashSet<_> = context_frequencies
112            .into_iter()
113            .take(max_contexts)
114            .map(|(context, _)| context)
115            .collect();
116
117        self.contexts
118            .retain(|context, _| contexts_to_keep.contains(context));
119
120        initial_count - self.contexts.len()
121    }
122
123    /// Estimate memory usage of the context tree
124    pub fn estimate_memory_usage(&self) -> usize {
125        let mut total_bytes = 0;
126
127        for (context, node) in &self.contexts {
128            // Context vector size
129            total_bytes += context.len() * std::mem::size_of::<String>();
130            total_bytes += context.iter().map(|s| s.capacity()).sum::<usize>();
131
132            // Node counts HashMap
133            total_bytes +=
134                node.counts.len() * (std::mem::size_of::<String>() + std::mem::size_of::<usize>());
135            total_bytes += node.counts.keys().map(|s| s.capacity()).sum::<usize>();
136
137            // Node probabilities HashMap
138            total_bytes += node.probabilities.len()
139                * (std::mem::size_of::<String>() + std::mem::size_of::<f64>());
140            total_bytes += node
141                .probabilities
142                .keys()
143                .map(|s| s.capacity())
144                .sum::<usize>();
145
146            // Entropy and KL divergence
147            total_bytes += 2 * std::mem::size_of::<f64>();
148        }
149
150        total_bytes
151    }
152
153    /// Get context statistics for analysis
154    pub fn get_context_statistics(&self) -> ContextStatistics {
155        let mut stats = ContextStatistics::new();
156
157        for (context, node) in &self.contexts {
158            let total_count: usize = node.counts.values().sum();
159            let unique_transitions = node.counts.len();
160
161            stats.total_contexts += 1;
162            stats.total_transitions += total_count;
163            stats.total_entropy += node.entropy;
164
165            if total_count < stats.min_frequency {
166                stats.min_frequency = total_count;
167            }
168            if total_count > stats.max_frequency {
169                stats.max_frequency = total_count;
170            }
171
172            if node.entropy < stats.min_entropy {
173                stats.min_entropy = node.entropy;
174            }
175            if node.entropy > stats.max_entropy {
176                stats.max_entropy = node.entropy;
177            }
178
179            stats
180                .contexts_by_order
181                .entry(context.len())
182                .and_modify(|count| *count += 1)
183                .or_insert(1);
184
185            stats
186                .transitions_by_context
187                .entry(context.len())
188                .and_modify(|count| *count += unique_transitions)
189                .or_insert(unique_transitions);
190        }
191
192        if stats.total_contexts > 0 {
193            stats.avg_entropy = stats.total_entropy / stats.total_contexts as f64;
194            stats.avg_frequency = stats.total_transitions as f64 / stats.total_contexts as f64;
195        }
196
197        stats
198    }
199}
200
201/// Statistics about context tree structure and usage
202#[derive(Debug, Clone)]
203pub struct ContextStatistics {
204    /// Total number of contexts
205    pub total_contexts: usize,
206    /// Total number of transitions across all contexts
207    pub total_transitions: usize,
208    /// Sum of entropy across all contexts
209    pub total_entropy: f64,
210    /// Average entropy per context
211    pub avg_entropy: f64,
212    /// Average frequency per context
213    pub avg_frequency: f64,
214    /// Minimum frequency observed
215    pub min_frequency: usize,
216    /// Maximum frequency observed
217    pub max_frequency: usize,
218    /// Minimum entropy observed
219    pub min_entropy: f64,
220    /// Maximum entropy observed
221    pub max_entropy: f64,
222    /// Number of contexts by order
223    pub contexts_by_order: HashMap<usize, usize>,
224    /// Number of unique transitions by context order
225    pub transitions_by_context: HashMap<usize, usize>,
226}
227
228impl ContextStatistics {
229    /// Create new context statistics
230    pub fn new() -> Self {
231        Self {
232            total_contexts: 0,
233            total_transitions: 0,
234            total_entropy: 0.0,
235            avg_entropy: 0.0,
236            avg_frequency: 0.0,
237            min_frequency: usize::MAX,
238            max_frequency: 0,
239            min_entropy: f64::INFINITY,
240            max_entropy: 0.0,
241            contexts_by_order: HashMap::new(),
242            transitions_by_context: HashMap::new(),
243        }
244    }
245
246    /// Get memory efficiency (contexts per MB)
247    pub fn memory_efficiency(&self, memory_bytes: usize) -> f64 {
248        if memory_bytes == 0 {
249            return 0.0;
250        }
251        (self.total_contexts as f64) / (memory_bytes as f64 / 1_048_576.0)
252    }
253
254    /// Get compression ratio (transitions per context)
255    pub fn compression_ratio(&self) -> f64 {
256        if self.total_contexts == 0 {
257            return 0.0;
258        }
259        self.total_transitions as f64 / self.total_contexts as f64
260    }
261}
262
263impl Default for ContextStatistics {
264    fn default() -> Self {
265        Self::new()
266    }
267}
268
269/// Performance optimization configuration
270#[derive(Debug, Clone)]
271pub struct OptimizationConfig {
272    /// Enable context pruning
273    pub enable_pruning: bool,
274    /// Minimum count for context pruning
275    pub min_context_count: usize,
276    /// Minimum entropy for context pruning
277    pub min_entropy: f64,
278    /// Maximum number of contexts to keep
279    pub max_contexts: Option<usize>,
280    /// Enable performance monitoring
281    pub enable_monitoring: bool,
282}
283
284impl Default for OptimizationConfig {
285    fn default() -> Self {
286        Self {
287            enable_pruning: false,
288            min_context_count: 2,
289            min_entropy: 0.1,
290            max_contexts: None,
291            enable_monitoring: true,
292        }
293    }
294}
295
296impl OptimizationConfig {
297    /// Create configuration for memory-constrained environments
298    pub fn for_low_memory() -> Self {
299        Self {
300            enable_pruning: true,
301            min_context_count: 3,
302            min_entropy: 0.2,
303            max_contexts: Some(10_000),
304            enable_monitoring: true,
305        }
306    }
307
308    /// Create configuration for high-accuracy requirements
309    pub fn for_high_accuracy() -> Self {
310        Self {
311            enable_pruning: false,
312            min_context_count: 1,
313            min_entropy: 0.0,
314            max_contexts: None,
315            enable_monitoring: true,
316        }
317    }
318
319    /// Create configuration for balanced performance
320    pub fn balanced() -> Self {
321        Self {
322            enable_pruning: true,
323            min_context_count: 2,
324            min_entropy: 0.05,
325            max_contexts: Some(100_000),
326            enable_monitoring: true,
327        }
328    }
329}
330
331/// Apply performance optimizations to a context tree
332pub fn optimize_context_tree(
333    tree: &mut ContextTree,
334    config: &OptimizationConfig,
335) -> AnomalyGridResult<PerformanceMetrics> {
336    let start_time = Instant::now();
337    let _initial_count = tree.context_count();
338
339    if config.enable_pruning {
340        // Apply frequency-based pruning
341        if config.min_context_count > 1 {
342            tree.prune_low_frequency_contexts(config.min_context_count);
343        }
344
345        // Apply entropy-based pruning
346        if config.min_entropy > 0.0 {
347            tree.prune_low_entropy_contexts(config.min_entropy);
348        }
349
350        // Apply maximum context limit
351        if let Some(max_contexts) = config.max_contexts {
352            tree.keep_top_contexts(max_contexts);
353        }
354    }
355
356    let optimization_time = start_time.elapsed();
357
358    let mut metrics = PerformanceMetrics::new();
359    metrics.training_time_ms = optimization_time.as_millis() as u64;
360    metrics.context_count = tree.context_count();
361    metrics.estimated_memory_bytes = tree.estimate_memory_usage();
362
363    Ok(metrics)
364}
365
366#[cfg(test)]
367mod tests {
368    use super::*;
369    use crate::context_tree::ContextNode;
370
371    #[test]
372    fn test_performance_metrics() {
373        let mut metrics = PerformanceMetrics::new();
374
375        metrics.training_time_ms = 100; // 0.1 second
376        metrics.detection_time_ms = 50; // 0.05 seconds
377
378        // Test throughput calculations
379        // 1000 elements / 0.1 seconds = 10,000 elements/second
380        assert_eq!(metrics.training_throughput(1000), 10000.0);
381        // 500 elements / 0.05 seconds = 10,000 elements/second
382        assert_eq!(metrics.detection_throughput(500), 10000.0);
383    }
384
385    #[test]
386    fn test_context_pruning() {
387        let mut tree = ContextTree::new(2).expect("Failed to create tree");
388
389        // Add some test contexts with different frequencies
390        let mut high_freq_node = ContextNode::new();
391        for _ in 0..10 {
392            high_freq_node.add_transition("A".to_string());
393        }
394        tree.contexts.insert(vec!["X".to_string()], high_freq_node);
395
396        let mut low_freq_node = ContextNode::new();
397        low_freq_node.add_transition("B".to_string());
398        tree.contexts.insert(vec!["Y".to_string()], low_freq_node);
399
400        assert_eq!(tree.context_count(), 2);
401
402        // Prune contexts with frequency < 5
403        let pruned = tree.prune_low_frequency_contexts(5);
404
405        assert_eq!(pruned, 1); // Should remove the low frequency context
406        assert_eq!(tree.context_count(), 1);
407    }
408
409    #[test]
410    fn test_memory_estimation() {
411        let mut tree = ContextTree::new(2).expect("Failed to create tree");
412
413        // Add a test context
414        let mut node = ContextNode::new();
415        node.add_transition("A".to_string());
416        node.add_transition("B".to_string());
417        tree.contexts.insert(vec!["X".to_string()], node);
418
419        let memory_usage = tree.estimate_memory_usage();
420        assert!(memory_usage > 0);
421    }
422
423    #[test]
424    fn test_context_statistics() {
425        let mut tree = ContextTree::new(2).expect("Failed to create tree");
426
427        // Add test contexts
428        let mut node1 = ContextNode::new();
429        node1.add_transition("A".to_string());
430        node1.add_transition("B".to_string());
431        tree.contexts.insert(vec!["X".to_string()], node1);
432
433        let mut node2 = ContextNode::new();
434        node2.add_transition("C".to_string());
435        tree.contexts
436            .insert(vec!["Y".to_string(), "Z".to_string()], node2);
437
438        let stats = tree.get_context_statistics();
439
440        assert_eq!(stats.total_contexts, 2);
441        assert_eq!(stats.contexts_by_order.get(&1), Some(&1)); // One context of order 1
442        assert_eq!(stats.contexts_by_order.get(&2), Some(&1)); // One context of order 2
443    }
444
445    #[test]
446    fn test_optimization_config() {
447        let low_memory = OptimizationConfig::for_low_memory();
448        assert!(low_memory.enable_pruning);
449        assert!(low_memory.max_contexts.is_some());
450
451        let high_accuracy = OptimizationConfig::for_high_accuracy();
452        assert!(!high_accuracy.enable_pruning);
453        assert!(high_accuracy.max_contexts.is_none());
454
455        let balanced = OptimizationConfig::balanced();
456        assert!(balanced.enable_pruning);
457        assert!(balanced.max_contexts.is_some());
458    }
459
460    #[test]
461    fn test_optimize_context_tree() {
462        let mut tree = ContextTree::new(2).expect("Failed to create tree");
463
464        // Add test contexts with different frequencies
465        for i in 1..=10 {
466            // Start from 1 to ensure contexts have transitions
467            let mut node = ContextNode::new();
468            for _ in 0..i {
469                node.add_transition("A".to_string());
470            }
471            // Calculate probabilities to set entropy
472            let config = crate::config::AnomalyGridConfig::default();
473            node.calculate_probabilities(&config);
474            tree.contexts.insert(vec![format!("X{}", i)], node);
475        }
476
477        let initial_count = tree.context_count();
478
479        // Use a more lenient optimization config
480        let config = OptimizationConfig {
481            enable_pruning: true,
482            min_context_count: 2,  // Only remove contexts with < 2 transitions
483            min_entropy: 0.0,      // Don't filter by entropy
484            max_contexts: Some(8), // Keep top 8 contexts
485            enable_monitoring: true,
486        };
487
488        let metrics = optimize_context_tree(&mut tree, &config).expect("Failed to optimize");
489
490        assert!(tree.context_count() <= initial_count);
491        assert_eq!(metrics.context_count, tree.context_count());
492        // Should have some contexts remaining
493        assert!(tree.context_count() > 0);
494        // Memory estimation should work even with 0 contexts
495        // Memory bytes is usize, so always >= 0
496        assert!(metrics.estimated_memory_bytes < usize::MAX);
497    }
498}