anomaly_grid/markov_model/
mod.rs1use crate::config::AnomalyGridConfig;
7use crate::context_tree::ContextTree;
8use crate::error::{AnomalyGridError, AnomalyGridResult};
9use std::collections::HashMap;
10
11#[derive(Debug, Clone)]
13pub struct MarkovModel {
14 context_tree: ContextTree,
16 state_mapping: HashMap<String, usize>,
18 id_to_state: Vec<String>,
20 config: AnomalyGridConfig,
22}
23
24impl MarkovModel {
25 pub fn new(max_order: usize) -> AnomalyGridResult<Self> {
27 if max_order == 0 {
28 return Err(AnomalyGridError::invalid_max_order(max_order));
29 }
30
31 let config = AnomalyGridConfig::default().with_max_order(max_order)?;
32
33 Ok(Self {
34 context_tree: ContextTree::new(max_order)?,
35 state_mapping: HashMap::new(),
36 id_to_state: Vec::new(),
37 config,
38 })
39 }
40
41 pub fn with_config(config: AnomalyGridConfig) -> AnomalyGridResult<Self> {
43 config.validate()?;
44
45 Ok(Self {
46 context_tree: ContextTree::new(config.max_order)?,
47 state_mapping: HashMap::new(),
48 id_to_state: Vec::new(),
49 config,
50 })
51 }
52
53 pub fn train(&mut self, sequence: &[String]) -> AnomalyGridResult<()> {
63 if sequence.len() < self.config.min_sequence_length {
65 return Err(AnomalyGridError::sequence_too_short(
66 self.config.min_sequence_length,
67 sequence.len(),
68 "model training",
69 ));
70 }
71
72 self.build_state_mapping(sequence);
74
75 self.context_tree
77 .build_from_sequence(sequence, &self.config)
78 }
79
80 pub fn calculate_likelihood(&self, sequence: &[String]) -> f64 {
82 if sequence.len() < 2 {
83 return 1.0; }
85
86 let mut likelihood = 1.0;
87
88 for i in 1..sequence.len() {
89 let prob = self.get_best_context_probability_for_position(sequence, i);
90 likelihood *= prob;
91 }
92
93 likelihood
94 }
95
96 pub fn get_best_context_probability(&self, context: &[String], next_state: &str) -> f64 {
98 for context_len in (1..=context.len().min(self.context_tree.max_order)).rev() {
100 let sub_context = &context[context.len() - context_len..];
101
102 if let Some(prob) = self
103 .context_tree
104 .get_transition_probability(sub_context, next_state)
105 {
106 return prob;
107 }
108 }
109
110 self.get_background_probability(next_state)
112 }
113
114 pub fn max_order(&self) -> usize {
116 self.config.max_order
117 }
118
119 pub fn config(&self) -> &AnomalyGridConfig {
121 &self.config
122 }
123
124 pub fn state_mapping(&self) -> &HashMap<String, usize> {
126 &self.state_mapping
127 }
128
129 pub fn context_tree(&self) -> &ContextTree {
131 &self.context_tree
132 }
133
134 pub fn context_tree_mut(&mut self) -> &mut ContextTree {
136 &mut self.context_tree
137 }
138
139 fn build_state_mapping(&mut self, sequence: &[String]) {
141 let mut unique_states: std::collections::HashSet<String> =
142 sequence.iter().cloned().collect();
143
144 self.state_mapping.clear();
145 self.id_to_state.clear();
146
147 for (id, state) in unique_states.drain().enumerate() {
148 self.state_mapping.insert(state.clone(), id);
149 self.id_to_state.push(state);
150 }
151 }
152
153 fn get_best_context_probability_for_position(
155 &self,
156 sequence: &[String],
157 position: usize,
158 ) -> f64 {
159 let next_state = &sequence[position];
160 let max_context_len = position.min(self.context_tree.max_order);
161
162 for context_len in (1..=max_context_len).rev() {
164 let context = &sequence[position - context_len..position];
165
166 if let Some(prob) = self
167 .context_tree
168 .get_transition_probability(context, next_state)
169 {
170 return prob;
171 }
172 }
173
174 self.get_background_probability(next_state)
176 }
177
178 fn get_background_probability(&self, state: &str) -> f64 {
180 if self.state_mapping.contains_key(state) {
182 let vocab_size = self.state_mapping.len() as f64;
183 (1.0 / (vocab_size + 1.0)).max(self.config.min_probability)
184 } else {
185 self.config.min_probability
187 }
188 }
189}