anomaly_grid/
config.rs

1//! Configuration management for Anomaly Grid
2//!
3//! This module provides centralized configuration for all library parameters,
4//! enabling fine-tuning of algorithm behavior and performance characteristics.
5
6use crate::error::{AnomalyGridError, AnomalyGridResult};
7
8/// Configuration parameters for Anomaly Grid components
9#[derive(Debug, Clone, PartialEq)]
10pub struct AnomalyGridConfig {
11    /// Maximum context order for Markov model
12    pub max_order: usize,
13
14    /// Laplace smoothing parameter (alpha)
15    pub smoothing_alpha: f64,
16
17    /// Maximum number of contexts to store (None = unlimited)
18    pub memory_limit: Option<usize>,
19
20    /// Minimum probability for numerical stability
21    pub min_probability: f64,
22
23    /// Weight for likelihood component in anomaly strength calculation
24    pub likelihood_weight: f64,
25
26    /// Weight for information component in anomaly strength calculation
27    pub information_weight: f64,
28
29    /// Normalization factor for tanh scaling in anomaly strength
30    pub normalization_factor: f64,
31
32    /// Minimum sequence length for training
33    pub min_sequence_length: usize,
34}
35
36impl Default for AnomalyGridConfig {
37    fn default() -> Self {
38        Self {
39            max_order: 3,
40            smoothing_alpha: 1.0,
41            memory_limit: Some(1_000_000),
42            min_probability: 1e-12,
43            likelihood_weight: 0.7,
44            information_weight: 0.3,
45            normalization_factor: 10.0,
46            min_sequence_length: 2,
47        }
48    }
49}
50
51impl AnomalyGridConfig {
52    /// Create a new configuration with default values
53    pub fn new() -> Self {
54        Self::default()
55    }
56
57    /// Create a configuration optimized for small alphabets (≤ 10 states)
58    pub fn for_small_alphabet() -> Self {
59        Self {
60            max_order: 4,
61            memory_limit: Some(100_000),
62            ..Self::default()
63        }
64    }
65
66    /// Create a configuration optimized for large alphabets (> 20 states)
67    pub fn for_large_alphabet() -> Self {
68        Self {
69            max_order: 2,
70            memory_limit: Some(50_000),
71            smoothing_alpha: 0.5, // Less aggressive smoothing
72            ..Self::default()
73        }
74    }
75
76    /// Create a configuration optimized for memory-constrained environments
77    pub fn for_low_memory() -> Self {
78        Self {
79            max_order: 2,
80            memory_limit: Some(10_000),
81            ..Self::default()
82        }
83    }
84
85    /// Create a configuration optimized for high accuracy
86    pub fn for_high_accuracy() -> Self {
87        Self {
88            max_order: 5,
89            smoothing_alpha: 0.1, // Minimal smoothing
90            memory_limit: Some(5_000_000),
91            ..Self::default()
92        }
93    }
94
95    /// Validate the configuration parameters
96    pub fn validate(&self) -> AnomalyGridResult<()> {
97        // Validate max_order
98        if self.max_order == 0 {
99            return Err(AnomalyGridError::invalid_configuration(
100                "max_order",
101                &self.max_order.to_string(),
102                "a positive integer > 0",
103            ));
104        }
105
106        // Validate smoothing_alpha
107        if !self.smoothing_alpha.is_finite() || self.smoothing_alpha <= 0.0 {
108            return Err(AnomalyGridError::invalid_configuration(
109                "smoothing_alpha",
110                &self.smoothing_alpha.to_string(),
111                "a positive finite number",
112            ));
113        }
114
115        // Validate min_probability
116        if !self.min_probability.is_finite()
117            || self.min_probability <= 0.0
118            || self.min_probability >= 1.0
119        {
120            return Err(AnomalyGridError::invalid_configuration(
121                "min_probability",
122                &self.min_probability.to_string(),
123                "a value in (0, 1)",
124            ));
125        }
126
127        // Validate weights
128        if !self.likelihood_weight.is_finite() || self.likelihood_weight < 0.0 {
129            return Err(AnomalyGridError::invalid_configuration(
130                "likelihood_weight",
131                &self.likelihood_weight.to_string(),
132                "a non-negative finite number",
133            ));
134        }
135
136        if !self.information_weight.is_finite() || self.information_weight < 0.0 {
137            return Err(AnomalyGridError::invalid_configuration(
138                "information_weight",
139                &self.information_weight.to_string(),
140                "a non-negative finite number",
141            ));
142        }
143
144        // Validate weight sum
145        let weight_sum = self.likelihood_weight + self.information_weight;
146        if (weight_sum - 1.0).abs() > 1e-10 {
147            return Err(AnomalyGridError::invalid_configuration(
148                "weight_sum",
149                &weight_sum.to_string(),
150                "likelihood_weight + information_weight = 1.0",
151            ));
152        }
153
154        // Validate normalization_factor
155        if !self.normalization_factor.is_finite() || self.normalization_factor <= 0.0 {
156            return Err(AnomalyGridError::invalid_configuration(
157                "normalization_factor",
158                &self.normalization_factor.to_string(),
159                "a positive finite number",
160            ));
161        }
162
163        // Validate min_sequence_length
164        if self.min_sequence_length < 2 {
165            return Err(AnomalyGridError::invalid_configuration(
166                "min_sequence_length",
167                &self.min_sequence_length.to_string(),
168                "at least 2",
169            ));
170        }
171
172        // Validate memory_limit if set
173        if let Some(limit) = self.memory_limit {
174            if limit == 0 {
175                return Err(AnomalyGridError::invalid_configuration(
176                    "memory_limit",
177                    "0",
178                    "None (unlimited) or a positive integer",
179                ));
180            }
181        }
182
183        Ok(())
184    }
185
186    /// Set max_order with validation
187    pub fn with_max_order(mut self, max_order: usize) -> AnomalyGridResult<Self> {
188        if max_order == 0 {
189            return Err(AnomalyGridError::invalid_max_order(max_order));
190        }
191        self.max_order = max_order;
192        Ok(self)
193    }
194
195    /// Set smoothing_alpha with validation
196    pub fn with_smoothing_alpha(mut self, alpha: f64) -> AnomalyGridResult<Self> {
197        if !alpha.is_finite() || alpha <= 0.0 {
198            return Err(AnomalyGridError::invalid_configuration(
199                "smoothing_alpha",
200                &alpha.to_string(),
201                "a positive finite number",
202            ));
203        }
204        self.smoothing_alpha = alpha;
205        Ok(self)
206    }
207
208    /// Set memory_limit with validation
209    pub fn with_memory_limit(mut self, limit: Option<usize>) -> AnomalyGridResult<Self> {
210        if let Some(limit_val) = limit {
211            if limit_val == 0 {
212                return Err(AnomalyGridError::invalid_configuration(
213                    "memory_limit",
214                    "0",
215                    "None (unlimited) or a positive integer",
216                ));
217            }
218        }
219        self.memory_limit = limit;
220        Ok(self)
221    }
222
223    /// Set anomaly strength weights with validation
224    pub fn with_weights(
225        mut self,
226        likelihood_weight: f64,
227        information_weight: f64,
228    ) -> AnomalyGridResult<Self> {
229        if !likelihood_weight.is_finite() || likelihood_weight < 0.0 {
230            return Err(AnomalyGridError::invalid_configuration(
231                "likelihood_weight",
232                &likelihood_weight.to_string(),
233                "a non-negative finite number",
234            ));
235        }
236
237        if !information_weight.is_finite() || information_weight < 0.0 {
238            return Err(AnomalyGridError::invalid_configuration(
239                "information_weight",
240                &information_weight.to_string(),
241                "a non-negative finite number",
242            ));
243        }
244
245        let weight_sum = likelihood_weight + information_weight;
246        if (weight_sum - 1.0).abs() > 1e-10 {
247            return Err(AnomalyGridError::invalid_configuration(
248                "weight_sum",
249                &weight_sum.to_string(),
250                "likelihood_weight + information_weight = 1.0",
251            ));
252        }
253
254        self.likelihood_weight = likelihood_weight;
255        self.information_weight = information_weight;
256        Ok(self)
257    }
258
259    /// Get estimated memory usage for given alphabet size
260    pub fn estimate_memory_usage(&self, alphabet_size: usize) -> usize {
261        let mut total_contexts = 0;
262        for order in 1..=self.max_order {
263            total_contexts += alphabet_size.pow(order as u32);
264        }
265
266        // Apply memory limit if set
267        if let Some(limit) = self.memory_limit {
268            total_contexts.min(limit)
269        } else {
270            total_contexts
271        }
272    }
273
274    /// Check if configuration is suitable for given alphabet size
275    pub fn is_suitable_for_alphabet(&self, alphabet_size: usize) -> bool {
276        // Calculate actual memory needed (without limit capping)
277        let mut actual_contexts = 0;
278        for order in 1..=self.max_order {
279            actual_contexts += alphabet_size.pow(order as u32);
280        }
281
282        // Consider suitable if actual contexts fit within limits
283        match self.memory_limit {
284            Some(limit) => actual_contexts <= limit,
285            None => actual_contexts <= 10_000_000, // Reasonable default
286        }
287    }
288}
289
290#[cfg(test)]
291mod tests {
292    use super::*;
293
294    #[test]
295    fn test_default_config() {
296        let config = AnomalyGridConfig::default();
297        assert!(config.validate().is_ok());
298        assert_eq!(config.max_order, 3);
299        assert_eq!(config.smoothing_alpha, 1.0);
300        assert_eq!(config.memory_limit, Some(1_000_000));
301    }
302
303    #[test]
304    fn test_preset_configs() {
305        assert!(AnomalyGridConfig::for_small_alphabet().validate().is_ok());
306        assert!(AnomalyGridConfig::for_large_alphabet().validate().is_ok());
307        assert!(AnomalyGridConfig::for_low_memory().validate().is_ok());
308        assert!(AnomalyGridConfig::for_high_accuracy().validate().is_ok());
309    }
310
311    #[test]
312    fn test_invalid_max_order() {
313        let result = AnomalyGridConfig::default().with_max_order(0);
314        assert!(result.is_err());
315    }
316
317    #[test]
318    fn test_invalid_smoothing_alpha() {
319        let result = AnomalyGridConfig::default().with_smoothing_alpha(-1.0);
320        assert!(result.is_err());
321
322        let result = AnomalyGridConfig::default().with_smoothing_alpha(f64::NAN);
323        assert!(result.is_err());
324    }
325
326    #[test]
327    fn test_invalid_weights() {
328        let result = AnomalyGridConfig::default().with_weights(0.5, 0.6); // Sum > 1
329        assert!(result.is_err());
330
331        let result = AnomalyGridConfig::default().with_weights(-0.1, 1.1); // Negative weight
332        assert!(result.is_err());
333    }
334
335    #[test]
336    fn test_memory_estimation() {
337        let config = AnomalyGridConfig::default();
338
339        // For alphabet size 2, order 3: 2^1 + 2^2 + 2^3 = 2 + 4 + 8 = 14
340        let estimated = config.estimate_memory_usage(2);
341        assert_eq!(estimated, 14);
342
343        // Test with memory limit
344        let config_limited = AnomalyGridConfig::default()
345            .with_memory_limit(Some(10))
346            .unwrap();
347        let estimated_limited = config_limited.estimate_memory_usage(2);
348        assert_eq!(estimated_limited, 10); // Capped by limit
349    }
350
351    #[test]
352    fn test_alphabet_suitability() {
353        let config = AnomalyGridConfig::for_small_alphabet();
354        assert!(config.is_suitable_for_alphabet(5));
355        assert!(config.is_suitable_for_alphabet(10));
356
357        let config = AnomalyGridConfig::for_large_alphabet();
358        assert!(config.is_suitable_for_alphabet(50));
359
360        let config = AnomalyGridConfig::for_low_memory();
361        assert!(config.is_suitable_for_alphabet(3));
362
363        // For low memory config with max_order=2 and memory_limit=10_000:
364        // 100 states would need 100^1 + 100^2 = 100 + 10_000 = 10_100 contexts
365        // This exceeds the 10_000 limit, so should be rejected
366        let estimated = config.estimate_memory_usage(100);
367        assert_eq!(estimated, 10_000); // Should be capped at limit
368        assert!(!config.is_suitable_for_alphabet(100)); // Too large for low memory
369    }
370
371    #[test]
372    fn test_config_validation() {
373        let mut config = AnomalyGridConfig::default();
374
375        // Valid config should pass
376        assert!(config.validate().is_ok());
377
378        // Invalid max_order
379        config.max_order = 0;
380        assert!(config.validate().is_err());
381        config.max_order = 3;
382
383        // Invalid smoothing_alpha
384        config.smoothing_alpha = -1.0;
385        assert!(config.validate().is_err());
386        config.smoothing_alpha = 1.0;
387
388        // Invalid weights
389        config.likelihood_weight = 0.8;
390        config.information_weight = 0.3; // Sum > 1
391        assert!(config.validate().is_err());
392    }
393}