anomaly_grid/context_tree/
mod.rs1use crate::config::AnomalyGridConfig;
7use crate::error::{AnomalyGridError, AnomalyGridResult};
8use std::collections::HashMap;
9
10#[derive(Debug, Clone)]
12pub struct ContextNode {
13 pub counts: HashMap<String, usize>,
15 pub probabilities: HashMap<String, f64>,
17 pub entropy: f64,
19 pub kl_divergence: f64,
21}
22
23impl ContextNode {
24 pub fn new() -> Self {
26 Self {
27 counts: HashMap::new(),
28 probabilities: HashMap::new(),
29 entropy: 0.0,
30 kl_divergence: 0.0,
31 }
32 }
33
34 pub fn add_transition(&mut self, next_state: String) {
36 *self.counts.entry(next_state).or_insert(0) += 1;
37 }
38
39 pub fn calculate_probabilities(&mut self, config: &AnomalyGridConfig) {
41 if self.counts.is_empty() {
42 return;
43 }
44
45 let total_count: usize = self.counts.values().sum();
46 let vocab_size = self.counts.len();
47
48 self.probabilities.clear();
50
51 for (state, &count) in &self.counts {
52 let smoothed_prob = (count as f64 + config.smoothing_alpha)
53 / (total_count as f64 + config.smoothing_alpha * vocab_size as f64);
54 self.probabilities.insert(state.clone(), smoothed_prob);
55 }
56
57 self.entropy = self
60 .probabilities
61 .values()
62 .map(|&p| if p > 0.0 { -p * p.log2() } else { 0.0 })
63 .sum();
64
65 let uniform_prob = 1.0 / vocab_size as f64;
67 self.kl_divergence = self
68 .probabilities
69 .values()
70 .map(|&p| {
71 if p > 0.0 {
72 p * (p / uniform_prob).log2()
73 } else {
74 0.0
75 }
76 })
77 .sum();
78 }
79}
80
81impl Default for ContextNode {
82 fn default() -> Self {
83 Self::new()
84 }
85}
86
87#[derive(Debug, Clone)]
89pub struct ContextTree {
90 pub contexts: HashMap<Vec<String>, ContextNode>,
92 pub max_order: usize,
94}
95
96impl ContextTree {
97 pub fn new(max_order: usize) -> AnomalyGridResult<Self> {
99 if max_order == 0 {
100 return Err(AnomalyGridError::invalid_max_order(max_order));
101 }
102
103 Ok(Self {
104 contexts: HashMap::new(),
105 max_order,
106 })
107 }
108
109 pub fn build_from_sequence(
119 &mut self,
120 sequence: &[String],
121 config: &AnomalyGridConfig,
122 ) -> AnomalyGridResult<()> {
123 if sequence.len() < config.min_sequence_length {
125 return Err(AnomalyGridError::sequence_too_short(
126 config.min_sequence_length,
127 sequence.len(),
128 "context tree building",
129 ));
130 }
131
132 for window_size in 1..=self.max_order {
134 for window in sequence.windows(window_size + 1) {
135 if let Some(limit) = config.memory_limit {
137 if self.contexts.len() >= limit {
138 return Err(AnomalyGridError::memory_limit_exceeded(
139 self.contexts.len(),
140 limit,
141 ));
142 }
143 }
144
145 let context = window[..window_size].to_vec();
146 let next_state = &window[window_size];
147
148 let node = self.contexts.entry(context).or_default();
149 node.add_transition(next_state.clone());
150 }
151 }
152
153 for node in self.contexts.values_mut() {
155 node.calculate_probabilities(config);
156 }
157
158 Ok(())
159 }
160
161 pub fn get_transition_probability(&self, context: &[String], next_state: &str) -> Option<f64> {
163 self.contexts
164 .get(context)
165 .and_then(|node| node.probabilities.get(next_state))
166 .copied()
167 }
168
169 pub fn get_context_node(&self, context: &[String]) -> Option<&ContextNode> {
171 self.contexts.get(context)
172 }
173
174 pub fn get_contexts_of_order(&self, order: usize) -> Vec<&Vec<String>> {
176 self.contexts
177 .keys()
178 .filter(|context| context.len() == order)
179 .collect()
180 }
181
182 pub fn context_count(&self) -> usize {
184 self.contexts.len()
185 }
186}