anomaly_grid/
lib.rs

1use nalgebra::{Complex, DMatrix, DVector, RealField};
2use ndarray::{Array1, Array2, Array3};
3//use rand_distr::Normal; found a better approach
4use rayon::prelude::*;
5use std::collections::HashMap;
6
7// Advanced type definitions
8pub type StateSpace = Vec<String>;
9pub type ProbabilityTensor = Array3<f64>;
10pub type QuantumState = Array1<Complex<f64>>;
11
12#[derive(Debug, Clone)]
13pub struct AdvancedTransitionModel {
14    // Higher-order Markov model with variable context length
15    pub contexts: HashMap<Vec<String>, ContextNode>,
16    pub max_order: usize,
17    pub quantum_representation: Option<QuantumState>,
18    pub spectral_decomposition: Option<SpectralAnalysis>,
19}
20
21#[derive(Debug, Clone)]
22pub struct ContextNode {
23    pub counts: HashMap<String, usize>,
24    pub probabilities: HashMap<String, f64>,
25    pub information_content: f64,
26    pub entropy: f64,
27    pub kl_divergence: f64,
28}
29
30#[derive(Debug, Clone)]
31pub struct SpectralAnalysis {
32    pub eigenvalues: DVector<Complex<f64>>,
33    pub eigenvectors: DMatrix<Complex<f64>>,
34    pub stationary_distribution: DVector<f64>,
35    pub mixing_time: f64,
36    pub spectral_gap: f64,
37}
38
39#[derive(Debug, Clone)]
40pub struct AnomalyScore {
41    pub state_sequence: Vec<String>,
42    pub likelihood: f64,
43    pub information_theoretic_score: f64,
44    pub spectral_anomaly_score: f64,
45    pub quantum_coherence_measure: f64,
46    pub topological_signature: Vec<f64>,
47    pub confidence_interval: (f64, f64),
48}
49
50impl AdvancedTransitionModel {
51    pub fn new(max_order: usize) -> Self {
52        Self {
53            contexts: HashMap::new(),
54            max_order,
55            quantum_representation: None,
56            spectral_decomposition: None,
57        }
58    }
59
60    /// Build variable-order Markov model using Context Tree Weighting
61    pub fn build_context_tree(&mut self, sequence: &[String]) -> Result<(), String> {
62        // Implementation of Context Tree Weighting algorithm
63        for window_size in 1..=self.max_order {
64            for window in sequence.windows(window_size + 1) {
65                let context = window[..window_size].to_vec();
66                let next_state = &window[window_size];
67
68                let node = self
69                    .contexts
70                    .entry(context.clone())
71                    .or_insert_with(|| ContextNode::new());
72
73                *node.counts.entry(next_state.clone()).or_insert(0) += 1;
74            }
75        }
76
77        // Calculate probabilities and information measures
78        self.calculate_information_measures()?;
79        self.perform_spectral_analysis()?;
80        self.generate_quantum_representation()?;
81
82        Ok(())
83    }
84
85    /// Calculate advanced information-theoretic measures
86    fn calculate_information_measures(&mut self) -> Result<(), String> {
87        for (_context, node) in &mut self.contexts {
88            let total_count: usize = node.counts.values().sum();
89
90            // Calculate probabilities
91            for (state, &count) in &node.counts {
92                let prob = count as f64 / total_count as f64;
93                node.probabilities.insert(state.clone(), prob);
94            }
95
96            // Shannon entropy
97            node.entropy = node
98                .probabilities
99                .values()
100                .map(|&p| if p > 0.0 { -p * p.log2() } else { 0.0 })
101                .sum();
102
103            // Information content (surprise)
104            node.information_content = -node.entropy.log2();
105
106            // KL divergence from uniform distribution
107            let uniform_prob = 1.0 / node.probabilities.len() as f64;
108            node.kl_divergence = node
109                .probabilities
110                .values()
111                .map(|&p| {
112                    if p > 0.0 {
113                        p * (p / uniform_prob).log2()
114                    } else {
115                        0.0
116                    }
117                })
118                .sum();
119        }
120        Ok(())
121    }
122
123    /// Perform spectral analysis of the transition matrix
124    fn perform_spectral_analysis(&mut self) -> Result<(), String> {
125        // Early return if no contexts exist
126        if self.contexts.is_empty() {
127            return Ok(());
128        }
129
130        let matrix = match self.build_dense_transition_matrix() {
131            Ok(m) => m,
132            Err(_) => return Ok(()), // Don't fail the entire process
133        };
134
135        // Skip if matrix is empty
136        if matrix.nrows() == 0 {
137            return Ok(());
138        }
139
140        let eigenvalues = matrix.complex_eigenvalues();
141
142        // Use robust stationary distribution finder
143        let stationary_dist = self.find_stationary_distribution_robust(&matrix);
144
145        let mut sorted_eigenvals: Vec<f64> = eigenvalues.iter().map(|c| c.norm()).collect();
146        sorted_eigenvals.sort_by(|a, b| b.partial_cmp(a).unwrap_or(std::cmp::Ordering::Equal));
147
148        let spectral_gap = if sorted_eigenvals.len() > 1 {
149            (sorted_eigenvals[0] - sorted_eigenvals[1]).max(0.0)
150        } else {
151            0.0
152        };
153
154        let mixing_time = if spectral_gap > 1e-10 {
155            (1.0 / spectral_gap).ln().min(1000.0) // Cap at reasonable value
156        } else {
157            f64::INFINITY
158        };
159
160        self.spectral_decomposition = Some(SpectralAnalysis {
161            eigenvalues,
162            eigenvectors: DMatrix::zeros(0, 0),
163            stationary_distribution: stationary_dist,
164            mixing_time,
165            spectral_gap,
166        });
167
168        Ok(())
169    }
170
171    /// Fixed stationary distribution finder - no more infinite loops
172    fn find_stationary_distribution_robust(&self, matrix: &DMatrix<f64>) -> DVector<f64> {
173        let n = matrix.nrows();
174        if n == 0 {
175            return DVector::from_vec(vec![]);
176        }
177
178        // Try power iteration with limits
179        if let Some(dist) = self.power_iteration_with_limits(matrix) {
180            return dist;
181        }
182
183        // Fallback to uniform distribution
184        DVector::from_element(n, 1.0 / n as f64)
185    }
186    /// Power iteration with proper convergence checks
187    fn power_iteration_with_limits(&self, matrix: &DMatrix<f64>) -> Option<DVector<f64>> {
188        let n = matrix.nrows();
189        let mut dist = DVector::from_element(n, 1.0 / n as f64);
190
191        for _ in 0..100 {
192            // Hard limit: max 100 iterations
193            let new_dist = matrix.transpose() * &dist;
194            let norm = new_dist.norm();
195
196            if norm < 1e-15 {
197                // Vector became zero
198                return None;
199            }
200
201            let new_dist_normalized = new_dist / norm;
202
203            // Check convergence
204            if (&new_dist_normalized - &dist).norm() < 1e-9 {
205                return Some(new_dist_normalized);
206            }
207
208            dist = new_dist_normalized;
209        }
210
211        None // Didn't converge in time
212    }
213
214    /// Generate quantum representation of the Markov model
215    fn generate_quantum_representation(&mut self) -> Result<(), String> {
216        let states = self.get_unique_states();
217        let n_states = states.len();
218
219        // Create quantum superposition state
220        let mut quantum_state = Array1::zeros(n_states);
221
222        // Initialize with equal superposition
223        let amplitude = 1.0 / (n_states as f64).sqrt();
224        for i in 0..n_states {
225            quantum_state[i] = Complex::new(amplitude, 0.0);
226        }
227
228        // Apply quantum phase based on transition probabilities
229        for (i, state) in states.iter().enumerate() {
230            if let Some(context_node) = self.contexts.get(&vec![state.clone()]) {
231                let entropy_phase = context_node.entropy * std::f64::consts::PI;
232                quantum_state[i] = Complex::new(
233                    amplitude * entropy_phase.cos(),
234                    amplitude * entropy_phase.sin(),
235                );
236            }
237        }
238
239        self.quantum_representation = Some(quantum_state);
240        Ok(())
241    }
242
243    /// Advanced anomaly detection using multiple mathematical approaches
244    pub fn detect_advanced_anomalies(
245        &self,
246        sequence: &[String],
247        _threshold: f64,
248    ) -> Vec<AnomalyScore> {
249        sequence
250            .windows(self.max_order + 1)
251            .par_bridge()
252            // Compute a full anomaly score for each window;
253            // only windows that successfully produce a score are kept.
254            .filter_map(|window| self.calculate_comprehensive_anomaly_score(window))
255            .collect()
256    }
257
258    /// Calculate comprehensive anomaly score using multiple mathematical measures
259    fn calculate_comprehensive_anomaly_score(&self, sequence: &[String]) -> Option<AnomalyScore> {
260        // Likelihood-based score
261        let likelihood = self.calculate_sequence_likelihood(sequence)?;
262
263        // Information-theoretic score
264        let info_score = self.calculate_information_score(sequence)?;
265
266        // Spectral anomaly score
267        let spectral_score = self.calculate_spectral_anomaly_score(sequence)?;
268
269        // Quantum coherence measure
270        let quantum_score = self.calculate_quantum_coherence(sequence)?;
271
272        // Topological signature
273        let topo_signature = self.calculate_topological_signature(sequence)?;
274
275        // Bayesian confidence interval
276        let confidence_interval = self.calculate_bayesian_confidence_interval(sequence)?;
277
278        Some(AnomalyScore {
279            state_sequence: sequence.to_vec(),
280            likelihood,
281            information_theoretic_score: info_score,
282            spectral_anomaly_score: spectral_score,
283            quantum_coherence_measure: quantum_score,
284            topological_signature: topo_signature,
285            confidence_interval,
286        })
287    }
288
289    /// Calculate sequence likelihood using variable-order context
290    fn calculate_sequence_likelihood(&self, sequence: &[String]) -> Option<f64> {
291        let mut likelihood = 1.0;
292
293        for i in 1..sequence.len() {
294            let max_context_len = std::cmp::min(i, self.max_order);
295            let mut best_prob = 0.0;
296
297            for context_len in 1..=max_context_len {
298                let context = sequence[i - context_len..i].to_vec();
299                if let Some(node) = self.contexts.get(&context) {
300                    if let Some(&prob) = node.probabilities.get(&sequence[i]) {
301                        //  removed mathematically invalid sqrt division
302                        // Using the raw probability - it iss already the maximum likelihood estimate
303                        best_prob = best_prob.max(prob);
304                    }
305                }
306            }
307
308            if best_prob > 0.0 {
309                likelihood *= best_prob;
310            } else {
311                // Laplace smoothing for unseen transitions
312                likelihood *= 1e-10;
313            }
314        }
315
316        Some(likelihood)
317    }
318
319    /// Calculate information-theoretic anomaly score
320    fn calculate_information_score(&self, sequence: &[String]) -> Option<f64> {
321        let mut total_surprise = 0.0;
322        let mut context_entropy = 0.0;
323
324        for i in 1..sequence.len() {
325            let context_len = std::cmp::min(i, self.max_order);
326            let context = sequence[i - context_len..i].to_vec();
327
328            if let Some(node) = self.contexts.get(&context) {
329                // Add entropy of the context
330                context_entropy += node.entropy;
331
332                // Add surprise (information content) of the transition
333                if let Some(&prob) = node.probabilities.get(&sequence[i]) {
334                    total_surprise += -prob.log2();
335                }
336            }
337        }
338
339        // Normalize by sequence length
340        Some((total_surprise + context_entropy) / sequence.len() as f64)
341    }
342
343    /// Calculate spectral anomaly score based on eigenvalue analysis
344    fn calculate_spectral_anomaly_score(&self, sequence: &[String]) -> Option<f64> {
345        let spectral = self.spectral_decomposition.as_ref()?;
346
347        // Calculate deviation from expected stationary behavior
348        let mut deviation = 0.0;
349        let states = self.get_unique_states();
350
351        for (_, state) in sequence.iter().enumerate() {
352            if let Some(state_idx) = states.iter().position(|s| s == state) {
353                let expected_prob = spectral.stationary_distribution[state_idx];
354                let observed_freq = self.calculate_observed_frequency(state, sequence);
355                deviation += (observed_freq - expected_prob).abs();
356            }
357        }
358
359        Some(deviation / sequence.len() as f64)
360    }
361
362    /// Calculate quantum coherence measure
363    fn calculate_quantum_coherence(&self, _sequence: &[String]) -> Option<f64> {
364        let quantum_state = self.quantum_representation.as_ref()?;
365        let states = self.get_unique_states();
366
367        // Calculate von Neumann entropy of the quantum state
368        let mut density_matrix = Array2::zeros((states.len(), states.len()));
369
370        for i in 0..states.len() {
371            for j in 0..states.len() {
372                density_matrix[[i, j]] = (quantum_state[i].conj() * quantum_state[j]).re;
373            }
374        }
375
376        // Simplified coherence measure (should use proper eigenvalue decomposition)
377        let trace = (0..states.len())
378            .map(|i| density_matrix[[i, i]])
379            .sum::<f64>();
380        let coherence = 1.0 - trace / states.len() as f64;
381
382        Some(coherence)
383    }
384
385    /// Calculate topological signature using persistent homology concepts
386    fn calculate_topological_signature(&self, sequence: &[String]) -> Option<Vec<f64>> {
387        // Simplified topological analysis
388        // In full implementation, would use persistent homology libraries
389
390        let mut signature = Vec::new();
391
392        // Calculate different topological invariants
393        // 1. Connected components (Betti-0)
394        let components = self.count_connected_components(sequence);
395        signature.push(components as f64);
396
397        // 2. Cycles (simplified Betti-1)
398        let cycles = self.count_approximate_cycles(sequence);
399        signature.push(cycles as f64);
400
401        // 3. Local clustering coefficient
402        let clustering = self.calculate_local_clustering(sequence);
403        signature.push(clustering);
404
405        Some(signature)
406    }
407
408    /// Calculate Bayesian confidence interval for anomaly score
409    fn calculate_bayesian_confidence_interval(&self, sequence: &[String]) -> Option<(f64, f64)> {
410        // Simplified Bayesian interval calculation
411        // In full implementation, would use MCMC sampling
412
413        let likelihood = self.calculate_sequence_likelihood(sequence)?;
414        let log_likelihood = likelihood.ln();
415
416        // Use normal approximation for confidence interval
417        let std_error = (1.0 / sequence.len() as f64).sqrt();
418        let margin = 1.96 * std_error; // 95% confidence interval
419
420        Some((
421            (log_likelihood - margin).exp(),
422            (log_likelihood + margin).exp(),
423        ))
424    }
425
426    // Helper methods
427    fn build_dense_transition_matrix(&self) -> Result<DMatrix<f64>, String> {
428        let states = self.get_unique_states();
429        let n = states.len();
430        let mut matrix = DMatrix::zeros(n, n);
431
432        for (i, from_state) in states.iter().enumerate() {
433            if let Some(node) = self.contexts.get(&vec![from_state.clone()]) {
434                for (j, to_state) in states.iter().enumerate() {
435                    if let Some(&prob) = node.probabilities.get(to_state) {
436                        matrix[(i, j)] = prob;
437                    }
438                }
439            }
440        }
441
442        Ok(matrix)
443    }
444
445    /// Original method replacement - add the robust version
446    //fn find_stationary_distribution(&self, matrix: &DMatrix<f64>) -> Result<DVector<f64>, String> {
447    //    Ok(self.find_stationary_distribution_robust(matrix))
448    //}
449
450    fn get_unique_states(&self) -> Vec<String> {
451        let mut states = std::collections::HashSet::new();
452        for context in self.contexts.keys() {
453            for state in context {
454                states.insert(state.clone());
455            }
456        }
457        for node in self.contexts.values() {
458            for state in node.counts.keys() {
459                states.insert(state.clone());
460            }
461        }
462        states.into_iter().collect()
463    }
464
465    fn calculate_observed_frequency(&self, target_state: &str, sequence: &[String]) -> f64 {
466        let count = sequence.iter().filter(|&s| s == target_state).count();
467        count as f64 / sequence.len() as f64
468    }
469
470    fn count_connected_components(&self, _sequence: &[String]) -> usize {
471        // Simplified implementation
472        self.contexts.len()
473    }
474
475    fn count_approximate_cycles(&self, sequence: &[String]) -> usize {
476        // Count approximate cycles by looking for repeated subsequences
477        let mut cycles = 0;
478        for len in 2..=sequence.len() / 2 {
479            for i in 0..=sequence.len() - 2 * len {
480                if sequence[i..i + len] == sequence[i + len..i + 2 * len] {
481                    cycles += 1;
482                }
483            }
484        }
485        cycles
486    }
487
488    fn calculate_local_clustering(&self, _sequence: &[String]) -> f64 {
489        // Simplified clustering coefficient
490        let mut clustering_sum = 0.0;
491        let mut node_count = 0;
492
493        for (_context, node) in &self.contexts {
494            if node.counts.len() > 1 {
495                let connections = node.counts.len();
496                let possible_connections = connections * (connections - 1) / 2;
497                if possible_connections > 0 {
498                    clustering_sum += connections as f64 / possible_connections as f64;
499                    node_count += 1;
500                }
501            }
502        }
503
504        if node_count > 0 {
505            clustering_sum / node_count as f64
506        } else {
507            0.0
508        }
509    }
510}
511
512impl ContextNode {
513    fn new() -> Self {
514        Self {
515            counts: HashMap::new(),
516            probabilities: HashMap::new(),
517            information_content: 0.0,
518            entropy: 0.0,
519            kl_divergence: 0.0,
520        }
521    }
522}
523
524// Performance-optimized parallel processing
525pub fn batch_process_sequences(
526    sequences: &[Vec<String>],
527    max_order: usize,
528    threshold: f64,
529) -> Vec<Vec<AnomalyScore>> {
530    sequences
531        .par_iter()
532        .map(|sequence| {
533            let mut model = AdvancedTransitionModel::new(max_order);
534            model.build_context_tree(sequence).unwrap_or_else(|e| {
535                eprintln!("Error building context tree: {}", e);
536            });
537            model.detect_advanced_anomalies(sequence, threshold)
538        })
539        .collect()
540}
541
542#[cfg(test)]
543mod tests {
544    use super::*;
545
546    #[test]
547    fn test_advanced_anomaly_detection() {
548        let sequence: Vec<String> = vec!["A", "B", "C", "A", "B", "C", "A", "D", "E", "F", "A"]
549            .into_iter()
550            .map(String::from)
551            .collect();
552
553        let mut model = AdvancedTransitionModel::new(3);
554        model.build_context_tree(&sequence).unwrap();
555
556        let anomalies = model.detect_advanced_anomalies(&sequence, 0.1);
557
558        assert!(!anomalies.is_empty());
559
560        for anomaly in &anomalies {
561            println!("Advanced Anomaly Detected:");
562            println!("  Sequence: {:?}", anomaly.state_sequence);
563            println!("  Likelihood: {:.6}", anomaly.likelihood);
564            println!(
565                "  Information Score: {:.6}",
566                anomaly.information_theoretic_score
567            );
568            println!("  Spectral Score: {:.6}", anomaly.spectral_anomaly_score);
569            println!(
570                "  Quantum Coherence: {:.6}",
571                anomaly.quantum_coherence_measure
572            );
573            println!("  Confidence Interval: {:?}", anomaly.confidence_interval);
574            println!(
575                "  Topological Signature: {:?}",
576                anomaly.topological_signature
577            );
578            println!();
579        }
580    }
581
582    #[test]
583    fn test_spectral_analysis() {
584        let sequence: Vec<String> = vec!["A", "B", "A", "B", "A", "C", "A", "C"]
585            .into_iter()
586            .map(String::from)
587            .collect();
588
589        let mut model = AdvancedTransitionModel::new(2);
590        model.build_context_tree(&sequence).unwrap();
591
592        assert!(model.spectral_decomposition.is_some());
593
594        let spectral = model.spectral_decomposition.as_ref().unwrap();
595        println!("Spectral Analysis:");
596        println!("  Mixing time: {:.4}", spectral.mixing_time);
597        println!("  Spectral gap: {:.4}", spectral.spectral_gap);
598        println!(
599            "  Stationary distribution: {:?}",
600            spectral.stationary_distribution
601        );
602    }
603}
604pub mod func_tests;
605pub mod test_runner;