1use crate::context_tree::ContextTree;
7use crate::error::AnomalyGridResult;
8use crate::string_interner::StateId;
9use std::collections::{HashMap, HashSet};
10use std::time::Instant;
11
12#[derive(Debug, Clone)]
14pub struct PerformanceMetrics {
15 pub training_time_ms: u64,
17 pub detection_time_ms: u64,
19 pub context_count: usize,
21 pub estimated_memory_bytes: usize,
23}
24
25impl PerformanceMetrics {
26 pub fn new() -> Self {
28 Self {
29 training_time_ms: 0,
30 detection_time_ms: 0,
31 context_count: 0,
32 estimated_memory_bytes: 0,
33 }
34 }
35
36 pub fn training_throughput(&self, sequence_length: usize) -> f64 {
38 if self.training_time_ms == 0 {
39 return 0.0;
40 }
41 (sequence_length as f64) / (self.training_time_ms as f64 / 1000.0)
42 }
43
44 pub fn detection_throughput(&self, sequence_length: usize) -> f64 {
46 if self.detection_time_ms == 0 {
47 return 0.0;
48 }
49 (sequence_length as f64) / (self.detection_time_ms as f64 / 1000.0)
50 }
51}
52
53impl Default for PerformanceMetrics {
54 fn default() -> Self {
55 Self::new()
56 }
57}
58
59impl ContextTree {
61 pub fn prune_low_frequency_contexts(&mut self, min_count: usize) -> usize {
66 if min_count == 0 {
67 return 0;
68 }
69
70 self.rebuild_filtered(|_, node| node.total_count() >= min_count)
71 }
72
73 pub fn prune_low_entropy_contexts(&mut self, min_entropy: f64) -> usize {
80 if min_entropy <= 0.0 {
81 return 0;
82 }
83
84 let cfg = self.last_config.clone();
85 self.rebuild_filtered(|_, node| node.compute_entropy(&cfg) >= min_entropy)
86 }
87
88 pub fn limit_context_count(&mut self, max_contexts: usize) -> usize {
95 if max_contexts == 0 {
96 return 0;
97 }
98
99 let original_count = self.trie().context_count();
100 if original_count <= max_contexts {
101 return 0;
102 }
103
104 let mut contexts: Vec<(Vec<StateId>, usize)> = self
106 .trie()
107 .iter_contexts()
108 .map(|(state_ids, node)| (state_ids, node.total_count()))
109 .collect();
110
111 contexts.sort_by(|a, b| b.1.cmp(&a.1));
113 let keep: HashSet<Vec<StateId>> = contexts
114 .into_iter()
115 .take(max_contexts)
116 .map(|(ids, _)| ids)
117 .collect();
118
119 self.rebuild_filtered(|state_ids, _| keep.contains(state_ids))
120 }
121
122 pub fn estimate_memory_usage(&self) -> usize {
127 self.trie().memory_usage()
128 }
129
130 pub fn get_context_statistics(&self) -> ContextStatistics {
135 let mut stats = ContextStatistics::new();
136 stats.total_contexts = self.trie().context_count();
137
138 for (state_ids, node) in self.trie().iter_contexts() {
139 let order = state_ids.len();
140 *stats.contexts_by_order.entry(order).or_insert(0) += 1;
141 stats.total_transitions += node.total_transitions();
142 }
143
144 if stats.total_contexts > 0 {
145 stats.avg_frequency = stats.total_transitions as f64 / stats.total_contexts as f64;
146 }
147
148 stats
149 }
150}
151
152#[derive(Debug, Clone)]
154pub struct ContextStatistics {
155 pub total_contexts: usize,
157 pub total_transitions: usize,
159 pub total_entropy: f64,
161 pub avg_entropy: f64,
163 pub avg_frequency: f64,
165 pub min_frequency: usize,
167 pub max_frequency: usize,
169 pub min_entropy: f64,
171 pub max_entropy: f64,
173 pub contexts_by_order: HashMap<usize, usize>,
175 pub transitions_by_context: HashMap<usize, usize>,
177}
178
179impl ContextStatistics {
180 pub fn new() -> Self {
182 Self {
183 total_contexts: 0,
184 total_transitions: 0,
185 total_entropy: 0.0,
186 avg_entropy: 0.0,
187 avg_frequency: 0.0,
188 min_frequency: usize::MAX,
189 max_frequency: 0,
190 min_entropy: f64::INFINITY,
191 max_entropy: 0.0,
192 contexts_by_order: HashMap::new(),
193 transitions_by_context: HashMap::new(),
194 }
195 }
196
197 pub fn memory_efficiency(&self, memory_bytes: usize) -> f64 {
199 if memory_bytes == 0 {
200 return 0.0;
201 }
202 (self.total_contexts as f64) / (memory_bytes as f64 / 1_048_576.0)
203 }
204
205 pub fn compression_ratio(&self) -> f64 {
207 if self.total_contexts == 0 {
208 return 0.0;
209 }
210 self.total_transitions as f64 / self.total_contexts as f64
211 }
212}
213
214impl Default for ContextStatistics {
215 fn default() -> Self {
216 Self::new()
217 }
218}
219
220#[derive(Debug, Clone)]
222pub struct OptimizationConfig {
223 pub enable_pruning: bool,
225 pub min_context_count: usize,
227 pub min_entropy: f64,
229 pub max_contexts: Option<usize>,
231 pub enable_monitoring: bool,
233}
234
235impl Default for OptimizationConfig {
236 fn default() -> Self {
237 Self {
238 enable_pruning: false,
239 min_context_count: 2,
240 min_entropy: 0.1,
241 max_contexts: None,
242 enable_monitoring: true,
243 }
244 }
245}
246
247impl OptimizationConfig {
248 pub fn for_low_memory() -> Self {
250 Self {
251 enable_pruning: true,
252 min_context_count: 3,
253 min_entropy: 0.2,
254 max_contexts: Some(10_000),
255 enable_monitoring: true,
256 }
257 }
258
259 pub fn for_high_accuracy() -> Self {
261 Self {
262 enable_pruning: false,
263 min_context_count: 1,
264 min_entropy: 0.0,
265 max_contexts: None,
266 enable_monitoring: true,
267 }
268 }
269
270 pub fn balanced() -> Self {
272 Self {
273 enable_pruning: true,
274 min_context_count: 2,
275 min_entropy: 0.05,
276 max_contexts: Some(100_000),
277 enable_monitoring: true,
278 }
279 }
280}
281
282pub fn optimize_context_tree(
284 tree: &mut ContextTree,
285 config: &OptimizationConfig,
286) -> AnomalyGridResult<PerformanceMetrics> {
287 let start_time = Instant::now();
288 let _initial_count = tree.context_count();
289
290 if config.enable_pruning {
291 if config.min_context_count > 1 {
293 tree.prune_low_frequency_contexts(config.min_context_count);
294 }
295
296 if config.min_entropy > 0.0 {
298 tree.prune_low_entropy_contexts(config.min_entropy);
299 }
300
301 if let Some(_max_contexts) = config.max_contexts {
303 tree.limit_context_count(_max_contexts);
305 }
306 }
307
308 let optimization_time = start_time.elapsed();
309
310 let mut metrics = PerformanceMetrics::new();
311 metrics.training_time_ms = optimization_time.as_millis() as u64;
312 metrics.context_count = tree.context_count();
313 metrics.estimated_memory_bytes = tree.estimate_memory_usage();
314
315 Ok(metrics)
316}
317
318#[cfg(test)]
319mod tests {
320 use super::*;
321
322 #[test]
323 fn test_performance_metrics() {
324 let mut metrics = PerformanceMetrics::new();
325
326 metrics.training_time_ms = 100; metrics.detection_time_ms = 50; assert_eq!(metrics.training_throughput(1000), 10000.0);
332 assert_eq!(metrics.detection_throughput(500), 10000.0);
334 }
335
336 #[test]
337 fn test_context_pruning() {
338 let mut tree = ContextTree::new(2).expect("Failed to create tree");
339
340 let config = crate::config::AnomalyGridConfig::default();
342
343 let high_freq_sequence: Vec<String> = std::iter::repeat_n("X".to_string(), 5)
345 .chain(std::iter::repeat_n("A".to_string(), 10))
346 .collect();
347 tree.build_from_sequence(&high_freq_sequence, &config)
348 .expect("Failed to build");
349
350 let low_freq_sequence = vec!["Y".to_string(), "B".to_string()];
352 tree.build_from_sequence(&low_freq_sequence, &config)
353 .expect("Failed to build");
354
355 let initial_count = tree.context_count();
356 assert!(initial_count > 0);
357
358 let pruned = tree.prune_low_frequency_contexts(5);
360
361 assert!(pruned > 0);
362 assert!(tree.context_count() < initial_count);
363 }
364
365 #[test]
366 fn test_memory_estimation() {
367 let mut tree = ContextTree::new(2).expect("Failed to create tree");
368
369 let config = crate::config::AnomalyGridConfig::default();
371 let sequence = vec!["X".to_string(), "A".to_string(), "B".to_string()];
372 tree.build_from_sequence(&sequence, &config)
373 .expect("Failed to build");
374
375 let memory_usage = tree.estimate_memory_usage();
376 assert!(memory_usage > 0);
377 }
378
379 #[test]
380 fn test_context_statistics() {
381 let mut tree = ContextTree::new(2).expect("Failed to create tree");
382
383 let config = crate::config::AnomalyGridConfig::default();
385
386 let sequence1 = vec!["X".to_string(), "A".to_string(), "B".to_string()];
388 tree.build_from_sequence(&sequence1, &config)
389 .expect("Failed to build");
390
391 let sequence2 = vec!["Y".to_string(), "Z".to_string(), "C".to_string()];
392 tree.build_from_sequence(&sequence2, &config)
393 .expect("Failed to build");
394
395 let stats = tree.get_context_statistics();
396
397 assert!(stats.total_contexts > 0);
398 assert!(stats.contexts_by_order.contains_key(&1));
400 assert!(stats.contexts_by_order.contains_key(&2));
401 }
402
403 #[test]
404 fn test_optimization_config() {
405 let low_memory = OptimizationConfig::for_low_memory();
406 assert!(low_memory.enable_pruning);
407 assert!(low_memory.max_contexts.is_some());
408
409 let high_accuracy = OptimizationConfig::for_high_accuracy();
410 assert!(!high_accuracy.enable_pruning);
411 assert!(high_accuracy.max_contexts.is_none());
412
413 let balanced = OptimizationConfig::balanced();
414 assert!(balanced.enable_pruning);
415 assert!(balanced.max_contexts.is_some());
416 }
417
418 #[test]
419 fn test_optimize_context_tree() {
420 let mut tree = ContextTree::new(2).expect("Failed to create tree");
421
422 let config_build = crate::config::AnomalyGridConfig::default();
424
425 for i in 1..=5 {
427 let sequence: Vec<String> = (0..i + 2).map(|j| format!("S{}", j % 3)).collect();
428 tree.build_from_sequence(&sequence, &config_build)
429 .expect("Failed to build");
430 }
431
432 let initial_count = tree.context_count();
433 assert!(initial_count > 0);
434
435 let config = OptimizationConfig {
437 enable_pruning: true,
438 min_context_count: 2,
439 min_entropy: 0.0,
440 max_contexts: Some(8),
441 enable_monitoring: true,
442 };
443
444 let metrics = optimize_context_tree(&mut tree, &config).expect("Failed to optimize");
445
446 assert!(tree.context_count() <= initial_count);
447 assert_eq!(metrics.context_count, tree.context_count());
448 assert!(tree.context_count() > 0);
449 assert!(metrics.estimated_memory_bytes > 0);
450 }
451}