anomaly_grid/
performance.rs

1//! Performance optimization utilities for Anomaly Grid
2//!
3//! This module provides practical performance improvements focused on
4//! memory efficiency and computational optimization for anomaly detection.
5
6use crate::context_tree::ContextTree;
7use crate::error::AnomalyGridResult;
8use crate::string_interner::StateId;
9use std::collections::{HashMap, HashSet};
10use std::time::Instant;
11
12/// Simple performance metrics for monitoring
13#[derive(Debug, Clone)]
14pub struct PerformanceMetrics {
15    /// Training time in milliseconds
16    pub training_time_ms: u64,
17    /// Detection time in milliseconds  
18    pub detection_time_ms: u64,
19    /// Number of contexts created
20    pub context_count: usize,
21    /// Estimated memory usage in bytes
22    pub estimated_memory_bytes: usize,
23}
24
25impl PerformanceMetrics {
26    /// Create new performance metrics
27    pub fn new() -> Self {
28        Self {
29            training_time_ms: 0,
30            detection_time_ms: 0,
31            context_count: 0,
32            estimated_memory_bytes: 0,
33        }
34    }
35
36    /// Calculate training throughput (elements per second)
37    pub fn training_throughput(&self, sequence_length: usize) -> f64 {
38        if self.training_time_ms == 0 {
39            return 0.0;
40        }
41        (sequence_length as f64) / (self.training_time_ms as f64 / 1000.0)
42    }
43
44    /// Calculate detection throughput (elements per second)
45    pub fn detection_throughput(&self, sequence_length: usize) -> f64 {
46        if self.detection_time_ms == 0 {
47            return 0.0;
48        }
49        (sequence_length as f64) / (self.detection_time_ms as f64 / 1000.0)
50    }
51}
52
53impl Default for PerformanceMetrics {
54    fn default() -> Self {
55        Self::new()
56    }
57}
58
59/// Context pruning for memory optimization
60impl ContextTree {
61    /// Remove contexts with low frequency counts
62    ///
63    /// This removes contexts that have been observed fewer than `min_count` times,
64    /// which can significantly reduce memory usage for large alphabets.
65    pub fn prune_low_frequency_contexts(&mut self, min_count: usize) -> usize {
66        if min_count == 0 {
67            return 0;
68        }
69
70        self.rebuild_filtered(|_, node| node.total_count() >= min_count)
71    }
72
73    /// Remove contexts with low entropy (deterministic contexts)
74    ///
75    /// This removes contexts where the entropy is below the threshold,
76    /// indicating highly predictable transitions.
77    ///
78    /// Entropy is computed using the last configuration used during training.
79    pub fn prune_low_entropy_contexts(&mut self, min_entropy: f64) -> usize {
80        if min_entropy <= 0.0 {
81            return 0;
82        }
83
84        let cfg = self.last_config.clone();
85        self.rebuild_filtered(|_, node| node.compute_entropy(&cfg) >= min_entropy)
86    }
87
88    /// Keep only the most frequent contexts up to a maximum count
89    ///
90    /// This is useful for memory-constrained environments where you want to keep
91    /// only the most important contexts.
92    ///
93    /// Returns the number of contexts removed.
94    pub fn limit_context_count(&mut self, max_contexts: usize) -> usize {
95        if max_contexts == 0 {
96            return 0;
97        }
98
99        let original_count = self.trie().context_count();
100        if original_count <= max_contexts {
101            return 0;
102        }
103
104        // Collect contexts with their frequencies
105        let mut contexts: Vec<(Vec<StateId>, usize)> = self
106            .trie()
107            .iter_contexts()
108            .map(|(state_ids, node)| (state_ids, node.total_count()))
109            .collect();
110
111        // Keep the most frequent contexts
112        contexts.sort_by(|a, b| b.1.cmp(&a.1));
113        let keep: HashSet<Vec<StateId>> = contexts
114            .into_iter()
115            .take(max_contexts)
116            .map(|(ids, _)| ids)
117            .collect();
118
119        self.rebuild_filtered(|state_ids, _| keep.contains(state_ids))
120    }
121
122    /// Estimate memory usage of the context tree
123    ///
124    /// This provides an estimate of the total memory used by the context tree,
125    /// including the trie structure, context nodes, and transition counts.
126    pub fn estimate_memory_usage(&self) -> usize {
127        self.trie().memory_usage()
128    }
129
130    /// Get context statistics for analysis
131    ///
132    /// Returns detailed statistics about the context tree structure,
133    /// including distribution by order and memory usage patterns.
134    pub fn get_context_statistics(&self) -> ContextStatistics {
135        let mut stats = ContextStatistics::new();
136        stats.total_contexts = self.trie().context_count();
137
138        for (state_ids, node) in self.trie().iter_contexts() {
139            let order = state_ids.len();
140            *stats.contexts_by_order.entry(order).or_insert(0) += 1;
141            stats.total_transitions += node.total_transitions();
142        }
143
144        if stats.total_contexts > 0 {
145            stats.avg_frequency = stats.total_transitions as f64 / stats.total_contexts as f64;
146        }
147
148        stats
149    }
150}
151
152/// Statistics about context tree structure and usage
153#[derive(Debug, Clone)]
154pub struct ContextStatistics {
155    /// Total number of contexts
156    pub total_contexts: usize,
157    /// Total number of transitions across all contexts
158    pub total_transitions: usize,
159    /// Sum of entropy across all contexts
160    pub total_entropy: f64,
161    /// Average entropy per context
162    pub avg_entropy: f64,
163    /// Average frequency per context
164    pub avg_frequency: f64,
165    /// Minimum frequency observed
166    pub min_frequency: usize,
167    /// Maximum frequency observed
168    pub max_frequency: usize,
169    /// Minimum entropy observed
170    pub min_entropy: f64,
171    /// Maximum entropy observed
172    pub max_entropy: f64,
173    /// Number of contexts by order
174    pub contexts_by_order: HashMap<usize, usize>,
175    /// Number of unique transitions by context order
176    pub transitions_by_context: HashMap<usize, usize>,
177}
178
179impl ContextStatistics {
180    /// Create new context statistics
181    pub fn new() -> Self {
182        Self {
183            total_contexts: 0,
184            total_transitions: 0,
185            total_entropy: 0.0,
186            avg_entropy: 0.0,
187            avg_frequency: 0.0,
188            min_frequency: usize::MAX,
189            max_frequency: 0,
190            min_entropy: f64::INFINITY,
191            max_entropy: 0.0,
192            contexts_by_order: HashMap::new(),
193            transitions_by_context: HashMap::new(),
194        }
195    }
196
197    /// Get memory efficiency (contexts per MB)
198    pub fn memory_efficiency(&self, memory_bytes: usize) -> f64 {
199        if memory_bytes == 0 {
200            return 0.0;
201        }
202        (self.total_contexts as f64) / (memory_bytes as f64 / 1_048_576.0)
203    }
204
205    /// Get compression ratio (transitions per context)
206    pub fn compression_ratio(&self) -> f64 {
207        if self.total_contexts == 0 {
208            return 0.0;
209        }
210        self.total_transitions as f64 / self.total_contexts as f64
211    }
212}
213
214impl Default for ContextStatistics {
215    fn default() -> Self {
216        Self::new()
217    }
218}
219
220/// Performance optimization configuration
221#[derive(Debug, Clone)]
222pub struct OptimizationConfig {
223    /// Enable context pruning
224    pub enable_pruning: bool,
225    /// Minimum count for context pruning
226    pub min_context_count: usize,
227    /// Minimum entropy for context pruning
228    pub min_entropy: f64,
229    /// Maximum number of contexts to keep
230    pub max_contexts: Option<usize>,
231    /// Enable performance monitoring
232    pub enable_monitoring: bool,
233}
234
235impl Default for OptimizationConfig {
236    fn default() -> Self {
237        Self {
238            enable_pruning: false,
239            min_context_count: 2,
240            min_entropy: 0.1,
241            max_contexts: None,
242            enable_monitoring: true,
243        }
244    }
245}
246
247impl OptimizationConfig {
248    /// Create configuration for memory-constrained environments
249    pub fn for_low_memory() -> Self {
250        Self {
251            enable_pruning: true,
252            min_context_count: 3,
253            min_entropy: 0.2,
254            max_contexts: Some(10_000),
255            enable_monitoring: true,
256        }
257    }
258
259    /// Create configuration for high-accuracy requirements
260    pub fn for_high_accuracy() -> Self {
261        Self {
262            enable_pruning: false,
263            min_context_count: 1,
264            min_entropy: 0.0,
265            max_contexts: None,
266            enable_monitoring: true,
267        }
268    }
269
270    /// Create configuration for balanced performance
271    pub fn balanced() -> Self {
272        Self {
273            enable_pruning: true,
274            min_context_count: 2,
275            min_entropy: 0.05,
276            max_contexts: Some(100_000),
277            enable_monitoring: true,
278        }
279    }
280}
281
282/// Apply performance optimizations to a context tree
283pub fn optimize_context_tree(
284    tree: &mut ContextTree,
285    config: &OptimizationConfig,
286) -> AnomalyGridResult<PerformanceMetrics> {
287    let start_time = Instant::now();
288    let _initial_count = tree.context_count();
289
290    if config.enable_pruning {
291        // Apply frequency-based pruning
292        if config.min_context_count > 1 {
293            tree.prune_low_frequency_contexts(config.min_context_count);
294        }
295
296        // Apply entropy-based pruning
297        if config.min_entropy > 0.0 {
298            tree.prune_low_entropy_contexts(config.min_entropy);
299        }
300
301        // Apply maximum context limit
302        if let Some(_max_contexts) = config.max_contexts {
303            // TODO: Implement context limiting for trie-based storage
304            tree.limit_context_count(_max_contexts);
305        }
306    }
307
308    let optimization_time = start_time.elapsed();
309
310    let mut metrics = PerformanceMetrics::new();
311    metrics.training_time_ms = optimization_time.as_millis() as u64;
312    metrics.context_count = tree.context_count();
313    metrics.estimated_memory_bytes = tree.estimate_memory_usage();
314
315    Ok(metrics)
316}
317
318#[cfg(test)]
319mod tests {
320    use super::*;
321
322    #[test]
323    fn test_performance_metrics() {
324        let mut metrics = PerformanceMetrics::new();
325
326        metrics.training_time_ms = 100; // 0.1 second
327        metrics.detection_time_ms = 50; // 0.05 seconds
328
329        // Test throughput calculations
330        // 1000 elements / 0.1 seconds = 10,000 elements/second
331        assert_eq!(metrics.training_throughput(1000), 10000.0);
332        // 500 elements / 0.05 seconds = 10,000 elements/second
333        assert_eq!(metrics.detection_throughput(500), 10000.0);
334    }
335
336    #[test]
337    fn test_context_pruning() {
338        let mut tree = ContextTree::new(2).expect("Failed to create tree");
339
340        // Build contexts using the proper API
341        let config = crate::config::AnomalyGridConfig::default();
342
343        // Create high frequency sequence
344        let high_freq_sequence: Vec<String> = std::iter::repeat_n("X".to_string(), 5)
345            .chain(std::iter::repeat_n("A".to_string(), 10))
346            .collect();
347        tree.build_from_sequence(&high_freq_sequence, &config)
348            .expect("Failed to build");
349
350        // Create low frequency sequence
351        let low_freq_sequence = vec!["Y".to_string(), "B".to_string()];
352        tree.build_from_sequence(&low_freq_sequence, &config)
353            .expect("Failed to build");
354
355        let initial_count = tree.context_count();
356        assert!(initial_count > 0);
357
358        // Prune contexts with frequency < 5
359        let pruned = tree.prune_low_frequency_contexts(5);
360
361        assert!(pruned > 0);
362        assert!(tree.context_count() < initial_count);
363    }
364
365    #[test]
366    fn test_memory_estimation() {
367        let mut tree = ContextTree::new(2).expect("Failed to create tree");
368
369        // Build contexts using the proper API
370        let config = crate::config::AnomalyGridConfig::default();
371        let sequence = vec!["X".to_string(), "A".to_string(), "B".to_string()];
372        tree.build_from_sequence(&sequence, &config)
373            .expect("Failed to build");
374
375        let memory_usage = tree.estimate_memory_usage();
376        assert!(memory_usage > 0);
377    }
378
379    #[test]
380    fn test_context_statistics() {
381        let mut tree = ContextTree::new(2).expect("Failed to create tree");
382
383        // Build contexts using the proper API
384        let config = crate::config::AnomalyGridConfig::default();
385
386        // Create sequences that will generate contexts of different orders
387        let sequence1 = vec!["X".to_string(), "A".to_string(), "B".to_string()];
388        tree.build_from_sequence(&sequence1, &config)
389            .expect("Failed to build");
390
391        let sequence2 = vec!["Y".to_string(), "Z".to_string(), "C".to_string()];
392        tree.build_from_sequence(&sequence2, &config)
393            .expect("Failed to build");
394
395        let stats = tree.get_context_statistics();
396
397        assert!(stats.total_contexts > 0);
398        // With max_order=2, we should have contexts of order 1 and 2
399        assert!(stats.contexts_by_order.contains_key(&1));
400        assert!(stats.contexts_by_order.contains_key(&2));
401    }
402
403    #[test]
404    fn test_optimization_config() {
405        let low_memory = OptimizationConfig::for_low_memory();
406        assert!(low_memory.enable_pruning);
407        assert!(low_memory.max_contexts.is_some());
408
409        let high_accuracy = OptimizationConfig::for_high_accuracy();
410        assert!(!high_accuracy.enable_pruning);
411        assert!(high_accuracy.max_contexts.is_none());
412
413        let balanced = OptimizationConfig::balanced();
414        assert!(balanced.enable_pruning);
415        assert!(balanced.max_contexts.is_some());
416    }
417
418    #[test]
419    fn test_optimize_context_tree() {
420        let mut tree = ContextTree::new(2).expect("Failed to create tree");
421
422        // Build contexts using the proper API
423        let config_build = crate::config::AnomalyGridConfig::default();
424
425        // Create sequences with different patterns to generate various contexts
426        for i in 1..=5 {
427            let sequence: Vec<String> = (0..i + 2).map(|j| format!("S{}", j % 3)).collect();
428            tree.build_from_sequence(&sequence, &config_build)
429                .expect("Failed to build");
430        }
431
432        let initial_count = tree.context_count();
433        assert!(initial_count > 0);
434
435        // Use optimization config
436        let config = OptimizationConfig {
437            enable_pruning: true,
438            min_context_count: 2,
439            min_entropy: 0.0,
440            max_contexts: Some(8),
441            enable_monitoring: true,
442        };
443
444        let metrics = optimize_context_tree(&mut tree, &config).expect("Failed to optimize");
445
446        assert!(tree.context_count() <= initial_count);
447        assert_eq!(metrics.context_count, tree.context_count());
448        assert!(tree.context_count() > 0);
449        assert!(metrics.estimated_memory_bytes > 0);
450    }
451}