1use crate::context_tree::ContextTree;
7use crate::error::AnomalyGridResult;
8use std::collections::HashMap;
9use std::time::Instant;
10
11#[derive(Debug, Clone)]
13pub struct PerformanceMetrics {
14 pub training_time_ms: u64,
16 pub detection_time_ms: u64,
18 pub context_count: usize,
20 pub estimated_memory_bytes: usize,
22}
23
24impl PerformanceMetrics {
25 pub fn new() -> Self {
27 Self {
28 training_time_ms: 0,
29 detection_time_ms: 0,
30 context_count: 0,
31 estimated_memory_bytes: 0,
32 }
33 }
34
35 pub fn training_throughput(&self, sequence_length: usize) -> f64 {
37 if self.training_time_ms == 0 {
38 return 0.0;
39 }
40 (sequence_length as f64) / (self.training_time_ms as f64 / 1000.0)
41 }
42
43 pub fn detection_throughput(&self, sequence_length: usize) -> f64 {
45 if self.detection_time_ms == 0 {
46 return 0.0;
47 }
48 (sequence_length as f64) / (self.detection_time_ms as f64 / 1000.0)
49 }
50}
51
52impl Default for PerformanceMetrics {
53 fn default() -> Self {
54 Self::new()
55 }
56}
57
58impl ContextTree {
60 pub fn prune_low_frequency_contexts(&mut self, min_count: usize) -> usize {
65 let initial_count = self.contexts.len();
66
67 self.contexts.retain(|_, node| {
68 let total_transitions: usize = node.counts.values().sum();
69 total_transitions >= min_count
70 });
71
72 initial_count - self.contexts.len()
73 }
74
75 pub fn prune_low_entropy_contexts(&mut self, min_entropy: f64) -> usize {
80 let initial_count = self.contexts.len();
81
82 self.contexts.retain(|_, node| node.entropy >= min_entropy);
83
84 initial_count - self.contexts.len()
85 }
86
87 pub fn keep_top_contexts(&mut self, max_contexts: usize) -> usize {
92 if self.contexts.len() <= max_contexts {
93 return 0;
94 }
95
96 let initial_count = self.contexts.len();
97
98 let mut context_frequencies: Vec<_> = self
100 .contexts
101 .iter()
102 .map(|(context, node)| {
103 let freq: usize = node.counts.values().sum();
104 (context.clone(), freq)
105 })
106 .collect();
107
108 context_frequencies.sort_by(|a, b| b.1.cmp(&a.1));
109
110 let contexts_to_keep: std::collections::HashSet<_> = context_frequencies
112 .into_iter()
113 .take(max_contexts)
114 .map(|(context, _)| context)
115 .collect();
116
117 self.contexts
118 .retain(|context, _| contexts_to_keep.contains(context));
119
120 initial_count - self.contexts.len()
121 }
122
123 pub fn estimate_memory_usage(&self) -> usize {
125 let mut total_bytes = 0;
126
127 for (context, node) in &self.contexts {
128 total_bytes += context.len() * std::mem::size_of::<String>();
130 total_bytes += context.iter().map(|s| s.capacity()).sum::<usize>();
131
132 total_bytes +=
134 node.counts.len() * (std::mem::size_of::<String>() + std::mem::size_of::<usize>());
135 total_bytes += node.counts.keys().map(|s| s.capacity()).sum::<usize>();
136
137 total_bytes += node.probabilities.len()
139 * (std::mem::size_of::<String>() + std::mem::size_of::<f64>());
140 total_bytes += node
141 .probabilities
142 .keys()
143 .map(|s| s.capacity())
144 .sum::<usize>();
145
146 total_bytes += 2 * std::mem::size_of::<f64>();
148 }
149
150 total_bytes
151 }
152
153 pub fn get_context_statistics(&self) -> ContextStatistics {
155 let mut stats = ContextStatistics::new();
156
157 for (context, node) in &self.contexts {
158 let total_count: usize = node.counts.values().sum();
159 let unique_transitions = node.counts.len();
160
161 stats.total_contexts += 1;
162 stats.total_transitions += total_count;
163 stats.total_entropy += node.entropy;
164
165 if total_count < stats.min_frequency {
166 stats.min_frequency = total_count;
167 }
168 if total_count > stats.max_frequency {
169 stats.max_frequency = total_count;
170 }
171
172 if node.entropy < stats.min_entropy {
173 stats.min_entropy = node.entropy;
174 }
175 if node.entropy > stats.max_entropy {
176 stats.max_entropy = node.entropy;
177 }
178
179 stats
180 .contexts_by_order
181 .entry(context.len())
182 .and_modify(|count| *count += 1)
183 .or_insert(1);
184
185 stats
186 .transitions_by_context
187 .entry(context.len())
188 .and_modify(|count| *count += unique_transitions)
189 .or_insert(unique_transitions);
190 }
191
192 if stats.total_contexts > 0 {
193 stats.avg_entropy = stats.total_entropy / stats.total_contexts as f64;
194 stats.avg_frequency = stats.total_transitions as f64 / stats.total_contexts as f64;
195 }
196
197 stats
198 }
199}
200
201#[derive(Debug, Clone)]
203pub struct ContextStatistics {
204 pub total_contexts: usize,
206 pub total_transitions: usize,
208 pub total_entropy: f64,
210 pub avg_entropy: f64,
212 pub avg_frequency: f64,
214 pub min_frequency: usize,
216 pub max_frequency: usize,
218 pub min_entropy: f64,
220 pub max_entropy: f64,
222 pub contexts_by_order: HashMap<usize, usize>,
224 pub transitions_by_context: HashMap<usize, usize>,
226}
227
228impl ContextStatistics {
229 pub fn new() -> Self {
231 Self {
232 total_contexts: 0,
233 total_transitions: 0,
234 total_entropy: 0.0,
235 avg_entropy: 0.0,
236 avg_frequency: 0.0,
237 min_frequency: usize::MAX,
238 max_frequency: 0,
239 min_entropy: f64::INFINITY,
240 max_entropy: 0.0,
241 contexts_by_order: HashMap::new(),
242 transitions_by_context: HashMap::new(),
243 }
244 }
245
246 pub fn memory_efficiency(&self, memory_bytes: usize) -> f64 {
248 if memory_bytes == 0 {
249 return 0.0;
250 }
251 (self.total_contexts as f64) / (memory_bytes as f64 / 1_048_576.0)
252 }
253
254 pub fn compression_ratio(&self) -> f64 {
256 if self.total_contexts == 0 {
257 return 0.0;
258 }
259 self.total_transitions as f64 / self.total_contexts as f64
260 }
261}
262
263impl Default for ContextStatistics {
264 fn default() -> Self {
265 Self::new()
266 }
267}
268
269#[derive(Debug, Clone)]
271pub struct OptimizationConfig {
272 pub enable_pruning: bool,
274 pub min_context_count: usize,
276 pub min_entropy: f64,
278 pub max_contexts: Option<usize>,
280 pub enable_monitoring: bool,
282}
283
284impl Default for OptimizationConfig {
285 fn default() -> Self {
286 Self {
287 enable_pruning: false,
288 min_context_count: 2,
289 min_entropy: 0.1,
290 max_contexts: None,
291 enable_monitoring: true,
292 }
293 }
294}
295
296impl OptimizationConfig {
297 pub fn for_low_memory() -> Self {
299 Self {
300 enable_pruning: true,
301 min_context_count: 3,
302 min_entropy: 0.2,
303 max_contexts: Some(10_000),
304 enable_monitoring: true,
305 }
306 }
307
308 pub fn for_high_accuracy() -> Self {
310 Self {
311 enable_pruning: false,
312 min_context_count: 1,
313 min_entropy: 0.0,
314 max_contexts: None,
315 enable_monitoring: true,
316 }
317 }
318
319 pub fn balanced() -> Self {
321 Self {
322 enable_pruning: true,
323 min_context_count: 2,
324 min_entropy: 0.05,
325 max_contexts: Some(100_000),
326 enable_monitoring: true,
327 }
328 }
329}
330
331pub fn optimize_context_tree(
333 tree: &mut ContextTree,
334 config: &OptimizationConfig,
335) -> AnomalyGridResult<PerformanceMetrics> {
336 let start_time = Instant::now();
337 let _initial_count = tree.context_count();
338
339 if config.enable_pruning {
340 if config.min_context_count > 1 {
342 tree.prune_low_frequency_contexts(config.min_context_count);
343 }
344
345 if config.min_entropy > 0.0 {
347 tree.prune_low_entropy_contexts(config.min_entropy);
348 }
349
350 if let Some(max_contexts) = config.max_contexts {
352 tree.keep_top_contexts(max_contexts);
353 }
354 }
355
356 let optimization_time = start_time.elapsed();
357
358 let mut metrics = PerformanceMetrics::new();
359 metrics.training_time_ms = optimization_time.as_millis() as u64;
360 metrics.context_count = tree.context_count();
361 metrics.estimated_memory_bytes = tree.estimate_memory_usage();
362
363 Ok(metrics)
364}
365
366#[cfg(test)]
367mod tests {
368 use super::*;
369 use crate::context_tree::ContextNode;
370
371 #[test]
372 fn test_performance_metrics() {
373 let mut metrics = PerformanceMetrics::new();
374
375 metrics.training_time_ms = 100; metrics.detection_time_ms = 50; assert_eq!(metrics.training_throughput(1000), 10000.0);
381 assert_eq!(metrics.detection_throughput(500), 10000.0);
383 }
384
385 #[test]
386 fn test_context_pruning() {
387 let mut tree = ContextTree::new(2).expect("Failed to create tree");
388
389 let mut high_freq_node = ContextNode::new();
391 for _ in 0..10 {
392 high_freq_node.add_transition("A".to_string());
393 }
394 tree.contexts.insert(vec!["X".to_string()], high_freq_node);
395
396 let mut low_freq_node = ContextNode::new();
397 low_freq_node.add_transition("B".to_string());
398 tree.contexts.insert(vec!["Y".to_string()], low_freq_node);
399
400 assert_eq!(tree.context_count(), 2);
401
402 let pruned = tree.prune_low_frequency_contexts(5);
404
405 assert_eq!(pruned, 1); assert_eq!(tree.context_count(), 1);
407 }
408
409 #[test]
410 fn test_memory_estimation() {
411 let mut tree = ContextTree::new(2).expect("Failed to create tree");
412
413 let mut node = ContextNode::new();
415 node.add_transition("A".to_string());
416 node.add_transition("B".to_string());
417 tree.contexts.insert(vec!["X".to_string()], node);
418
419 let memory_usage = tree.estimate_memory_usage();
420 assert!(memory_usage > 0);
421 }
422
423 #[test]
424 fn test_context_statistics() {
425 let mut tree = ContextTree::new(2).expect("Failed to create tree");
426
427 let mut node1 = ContextNode::new();
429 node1.add_transition("A".to_string());
430 node1.add_transition("B".to_string());
431 tree.contexts.insert(vec!["X".to_string()], node1);
432
433 let mut node2 = ContextNode::new();
434 node2.add_transition("C".to_string());
435 tree.contexts
436 .insert(vec!["Y".to_string(), "Z".to_string()], node2);
437
438 let stats = tree.get_context_statistics();
439
440 assert_eq!(stats.total_contexts, 2);
441 assert_eq!(stats.contexts_by_order.get(&1), Some(&1)); assert_eq!(stats.contexts_by_order.get(&2), Some(&1)); }
444
445 #[test]
446 fn test_optimization_config() {
447 let low_memory = OptimizationConfig::for_low_memory();
448 assert!(low_memory.enable_pruning);
449 assert!(low_memory.max_contexts.is_some());
450
451 let high_accuracy = OptimizationConfig::for_high_accuracy();
452 assert!(!high_accuracy.enable_pruning);
453 assert!(high_accuracy.max_contexts.is_none());
454
455 let balanced = OptimizationConfig::balanced();
456 assert!(balanced.enable_pruning);
457 assert!(balanced.max_contexts.is_some());
458 }
459
460 #[test]
461 fn test_optimize_context_tree() {
462 let mut tree = ContextTree::new(2).expect("Failed to create tree");
463
464 for i in 1..=10 {
466 let mut node = ContextNode::new();
468 for _ in 0..i {
469 node.add_transition("A".to_string());
470 }
471 let config = crate::config::AnomalyGridConfig::default();
473 node.calculate_probabilities(&config);
474 tree.contexts.insert(vec![format!("X{}", i)], node);
475 }
476
477 let initial_count = tree.context_count();
478
479 let config = OptimizationConfig {
481 enable_pruning: true,
482 min_context_count: 2, min_entropy: 0.0, max_contexts: Some(8), enable_monitoring: true,
486 };
487
488 let metrics = optimize_context_tree(&mut tree, &config).expect("Failed to optimize");
489
490 assert!(tree.context_count() <= initial_count);
491 assert_eq!(metrics.context_count, tree.context_count());
492 assert!(tree.context_count() > 0);
494 assert!(metrics.estimated_memory_bytes < usize::MAX);
497 }
498}