anomaly_grid/
lib.rs

1use nalgebra::{Complex, DMatrix, DVector, RealField};
2use ndarray::{Array1, Array2, Array3};
3//use rand_distr::Normal; found a better approach
4use rayon::prelude::*;
5use std::collections::HashMap;
6
7// Advanced type definitions
8pub type StateSpace = Vec<String>;
9pub type ProbabilityTensor = Array3<f64>;
10pub type QuantumState = Array1<Complex<f64>>;
11
12#[derive(Debug, Clone)]
13pub struct AdvancedTransitionModel {
14    // Higher-order Markov model with variable context length
15    pub contexts: HashMap<Vec<String>, ContextNode>,
16    pub max_order: usize,
17    pub quantum_representation: Option<QuantumState>,
18    pub spectral_decomposition: Option<SpectralAnalysis>,
19}
20
21#[derive(Debug, Clone)]
22pub struct ContextNode {
23    pub counts: HashMap<String, usize>,
24    pub probabilities: HashMap<String, f64>,
25    pub information_content: f64,
26    pub entropy: f64,
27    pub kl_divergence: f64,
28}
29
30#[derive(Debug, Clone)]
31pub struct SpectralAnalysis {
32    pub eigenvalues: DVector<Complex<f64>>,
33    pub eigenvectors: DMatrix<Complex<f64>>,
34    pub stationary_distribution: DVector<f64>,
35    pub mixing_time: f64,
36    pub spectral_gap: f64,
37}
38
39#[derive(Debug, Clone)]
40pub struct AnomalyScore {
41    pub state_sequence: Vec<String>,
42    pub likelihood: f64,
43    pub information_theoretic_score: f64,
44    pub spectral_anomaly_score: f64,
45    pub quantum_coherence_measure: f64,
46    pub topological_signature: Vec<f64>,
47    pub confidence_interval: (f64, f64),
48}
49
50impl AdvancedTransitionModel {
51    pub fn new(max_order: usize) -> Self {
52        Self {
53            contexts: HashMap::new(),
54            max_order,
55            quantum_representation: None,
56            spectral_decomposition: None,
57        }
58    }
59
60    /// Build variable-order Markov model using Context Tree Weighting
61    pub fn build_context_tree(&mut self, sequence: &[String]) -> Result<(), String> {
62        // Implementation of Context Tree Weighting algorithm
63        for window_size in 1..=self.max_order {
64            for window in sequence.windows(window_size + 1) {
65                let context = window[..window_size].to_vec();
66                let next_state = &window[window_size];
67
68                let node = self
69                    .contexts
70                    .entry(context.clone())
71                    .or_insert_with(|| ContextNode::new());
72
73                *node.counts.entry(next_state.clone()).or_insert(0) += 1;
74            }
75        }
76
77        // Calculate probabilities and information measures
78        self.calculate_information_measures()?;
79        self.perform_spectral_analysis()?;
80        self.generate_quantum_representation()?;
81
82        Ok(())
83    }
84
85    /// Calculate advanced information-theoretic measures
86    fn calculate_information_measures(&mut self) -> Result<(), String> {
87        for (_context, node) in &mut self.contexts {
88            let total_count: usize = node.counts.values().sum();
89
90            // Calculate probabilities
91            for (state, &count) in &node.counts {
92                let prob = count as f64 / total_count as f64;
93                node.probabilities.insert(state.clone(), prob);
94            }
95
96            // Shannon entropy
97            node.entropy = node
98                .probabilities
99                .values()
100                .map(|&p| if p > 0.0 { -p * p.log2() } else { 0.0 })
101                .sum();
102
103            // Information content (surprise)
104            node.information_content = -node.entropy.log2();
105
106            // KL divergence from uniform distribution
107            let uniform_prob = 1.0 / node.probabilities.len() as f64;
108            node.kl_divergence = node
109                .probabilities
110                .values()
111                .map(|&p| {
112                    if p > 0.0 {
113                        p * (p / uniform_prob).log2()
114                    } else {
115                        0.0
116                    }
117                })
118                .sum();
119        }
120        Ok(())
121    }
122
123    /// Perform spectral analysis of the transition matrix
124    fn perform_spectral_analysis(&mut self) -> Result<(), String> {
125        // Early return if no contexts exist
126        if self.contexts.is_empty() {
127            return Ok(());
128        }
129
130        let matrix = match self.build_dense_transition_matrix() {
131            Ok(m) => m,
132            Err(_) => return Ok(()), // Don't fail the entire process
133        };
134
135        // Skip if matrix is empty
136        if matrix.nrows() == 0 {
137            return Ok(());
138        }
139
140        let eigenvalues = matrix.complex_eigenvalues();
141
142        // Use robust stationary distribution finder
143        let stationary_dist = self.find_stationary_distribution_robust(&matrix);
144
145        let mut sorted_eigenvals: Vec<f64> = eigenvalues.iter().map(|c| c.norm()).collect();
146        sorted_eigenvals.sort_by(|a, b| b.partial_cmp(a).unwrap_or(std::cmp::Ordering::Equal));
147
148        let spectral_gap = if sorted_eigenvals.len() > 1 {
149            (sorted_eigenvals[0] - sorted_eigenvals[1]).max(0.0)
150        } else {
151            0.0
152        };
153
154        let mixing_time = if spectral_gap > 1e-10 {
155            (1.0 / spectral_gap).ln().min(1000.0) // Cap at reasonable value
156        } else {
157            f64::INFINITY
158        };
159
160        self.spectral_decomposition = Some(SpectralAnalysis {
161            eigenvalues,
162            eigenvectors: DMatrix::zeros(0, 0),
163            stationary_distribution: stationary_dist,
164            mixing_time,
165            spectral_gap,
166        });
167
168        Ok(())
169    }
170
171    /// Fixed stationary distribution finder - no more infinite loops
172    fn find_stationary_distribution_robust(&self, matrix: &DMatrix<f64>) -> DVector<f64> {
173        let n = matrix.nrows();
174        if n == 0 {
175            return DVector::from_vec(vec![]);
176        }
177
178        // Try power iteration with limits
179        if let Some(dist) = self.power_iteration_with_limits(matrix) {
180            return dist;
181        }
182
183        // Fallback to uniform distribution
184        DVector::from_element(n, 1.0 / n as f64)
185    }
186    /// Power iteration with proper convergence checks
187    fn power_iteration_with_limits(&self, matrix: &DMatrix<f64>) -> Option<DVector<f64>> {
188        let n = matrix.nrows();
189        let mut dist = DVector::from_element(n, 1.0 / n as f64);
190
191        for _ in 0..100 {
192            // Hard limit: max 100 iterations
193            let new_dist = matrix.transpose() * &dist;
194            let norm = new_dist.norm();
195
196            if norm < 1e-15 {
197                // Vector became zero
198                return None;
199            }
200
201            let new_dist_normalized = new_dist / norm;
202
203            // Check convergence
204            if (&new_dist_normalized - &dist).norm() < 1e-9 {
205                return Some(new_dist_normalized);
206            }
207
208            dist = new_dist_normalized;
209        }
210
211        None // Didn't converge in time
212    }
213
214    /// Generate quantum representation of the Markov model
215    fn generate_quantum_representation(&mut self) -> Result<(), String> {
216        let states = self.get_unique_states();
217        let n_states = states.len();
218
219        // Create quantum superposition state
220        let mut quantum_state = Array1::zeros(n_states);
221
222        // Initialize with equal superposition
223        let amplitude = 1.0 / (n_states as f64).sqrt();
224        for i in 0..n_states {
225            quantum_state[i] = Complex::new(amplitude, 0.0);
226        }
227
228        // Apply quantum phase based on transition probabilities
229        for (i, state) in states.iter().enumerate() {
230            if let Some(context_node) = self.contexts.get(&vec![state.clone()]) {
231                let entropy_phase = context_node.entropy * std::f64::consts::PI;
232                quantum_state[i] = Complex::new(
233                    amplitude * entropy_phase.cos(),
234                    amplitude * entropy_phase.sin(),
235                );
236            }
237        }
238
239        self.quantum_representation = Some(quantum_state);
240        Ok(())
241    }
242
243    /// Advanced anomaly detection using multiple mathematical approaches
244    pub fn detect_advanced_anomalies(
245        &self,
246        sequence: &[String],
247        _threshold: f64,
248    ) -> Vec<AnomalyScore> {
249        sequence
250            .windows(self.max_order + 1)
251            .par_bridge()
252            // Compute a full anomaly score for each window;
253            // only windows that successfully produce a score are kept.
254            .filter_map(|window| self.calculate_comprehensive_anomaly_score(window))
255            .collect()
256    }
257
258    /// Calculate comprehensive anomaly score using multiple mathematical measures
259    fn calculate_comprehensive_anomaly_score(&self, sequence: &[String]) -> Option<AnomalyScore> {
260        // Likelihood-based score
261        let likelihood = self.calculate_sequence_likelihood(sequence)?;
262
263        // Information-theoretic score
264        let info_score = self.calculate_information_score(sequence)?;
265
266        // Spectral anomaly score
267        let spectral_score = self.calculate_spectral_anomaly_score(sequence)?;
268
269        // Quantum coherence measure
270        let quantum_score = self.calculate_quantum_coherence(sequence)?;
271
272        // Topological signature
273        let topo_signature = self.calculate_topological_signature(sequence)?;
274
275        // Bayesian confidence interval
276        let confidence_interval = self.calculate_bayesian_confidence_interval(sequence)?;
277
278        Some(AnomalyScore {
279            state_sequence: sequence.to_vec(),
280            likelihood,
281            information_theoretic_score: info_score,
282            spectral_anomaly_score: spectral_score,
283            quantum_coherence_measure: quantum_score,
284            topological_signature: topo_signature,
285            confidence_interval,
286        })
287    }
288
289    /// Calculate sequence likelihood using variable-order context
290    fn calculate_sequence_likelihood(&self, sequence: &[String]) -> Option<f64> {
291        let mut likelihood = 1.0;
292
293        for i in 1..sequence.len() {
294            let max_context_len = std::cmp::min(i, self.max_order);
295            let mut best_prob = 0.0;
296
297            for context_len in 1..=max_context_len {
298                let context = sequence[i - context_len..i].to_vec();
299                if let Some(node) = self.contexts.get(&context) {
300                    if let Some(&prob) = node.probabilities.get(&sequence[i]) {
301                        // FIX: This corrected logic penalizes contexts with low counts,
302                        // correctly identifying rare transitions as low-likelihood events.
303                        let total_support: usize = node.counts.values().sum();
304                        if total_support > 0 {
305                            let weighted_prob = prob / (total_support as f64).sqrt();
306                            best_prob = best_prob.max(weighted_prob);
307                        }
308                    }
309                }
310            }
311
312            if best_prob > 0.0 {
313                likelihood *= best_prob;
314            } else {
315                // Laplace smoothing for unseen transitions
316                likelihood *= 1e-10;
317            }
318        }
319
320        Some(likelihood)
321    }
322
323    /// Calculate information-theoretic anomaly score
324    fn calculate_information_score(&self, sequence: &[String]) -> Option<f64> {
325        let mut total_surprise = 0.0;
326        let mut context_entropy = 0.0;
327
328        for i in 1..sequence.len() {
329            let context_len = std::cmp::min(i, self.max_order);
330            let context = sequence[i - context_len..i].to_vec();
331
332            if let Some(node) = self.contexts.get(&context) {
333                // Add entropy of the context
334                context_entropy += node.entropy;
335
336                // Add surprise (information content) of the transition
337                if let Some(&prob) = node.probabilities.get(&sequence[i]) {
338                    total_surprise += -prob.log2();
339                }
340            }
341        }
342
343        // Normalize by sequence length
344        Some((total_surprise + context_entropy) / sequence.len() as f64)
345    }
346
347    /// Calculate spectral anomaly score based on eigenvalue analysis
348    fn calculate_spectral_anomaly_score(&self, sequence: &[String]) -> Option<f64> {
349        let spectral = self.spectral_decomposition.as_ref()?;
350
351        // Calculate deviation from expected stationary behavior
352        let mut deviation = 0.0;
353        let states = self.get_unique_states();
354
355        for (_, state) in sequence.iter().enumerate() {
356            if let Some(state_idx) = states.iter().position(|s| s == state) {
357                let expected_prob = spectral.stationary_distribution[state_idx];
358                let observed_freq = self.calculate_observed_frequency(state, sequence);
359                deviation += (observed_freq - expected_prob).abs();
360            }
361        }
362
363        Some(deviation / sequence.len() as f64)
364    }
365
366    /// Calculate quantum coherence measure
367    fn calculate_quantum_coherence(&self, _sequence: &[String]) -> Option<f64> {
368        let quantum_state = self.quantum_representation.as_ref()?;
369        let states = self.get_unique_states();
370
371        // Calculate von Neumann entropy of the quantum state
372        let mut density_matrix = Array2::zeros((states.len(), states.len()));
373
374        for i in 0..states.len() {
375            for j in 0..states.len() {
376                density_matrix[[i, j]] = (quantum_state[i].conj() * quantum_state[j]).re;
377            }
378        }
379
380        // Simplified coherence measure (should use proper eigenvalue decomposition)
381        let trace = (0..states.len())
382            .map(|i| density_matrix[[i, i]])
383            .sum::<f64>();
384        let coherence = 1.0 - trace / states.len() as f64;
385
386        Some(coherence)
387    }
388
389    /// Calculate topological signature using persistent homology concepts
390    fn calculate_topological_signature(&self, sequence: &[String]) -> Option<Vec<f64>> {
391        // Simplified topological analysis
392        // In full implementation, would use persistent homology libraries
393
394        let mut signature = Vec::new();
395
396        // Calculate different topological invariants
397        // 1. Connected components (Betti-0)
398        let components = self.count_connected_components(sequence);
399        signature.push(components as f64);
400
401        // 2. Cycles (simplified Betti-1)
402        let cycles = self.count_approximate_cycles(sequence);
403        signature.push(cycles as f64);
404
405        // 3. Local clustering coefficient
406        let clustering = self.calculate_local_clustering(sequence);
407        signature.push(clustering);
408
409        Some(signature)
410    }
411
412    /// Calculate Bayesian confidence interval for anomaly score
413    fn calculate_bayesian_confidence_interval(&self, sequence: &[String]) -> Option<(f64, f64)> {
414        // Simplified Bayesian interval calculation
415        // In full implementation, would use MCMC sampling
416
417        let likelihood = self.calculate_sequence_likelihood(sequence)?;
418        let log_likelihood = likelihood.ln();
419
420        // Use normal approximation for confidence interval
421        let std_error = (1.0 / sequence.len() as f64).sqrt();
422        let margin = 1.96 * std_error; // 95% confidence interval
423
424        Some((
425            (log_likelihood - margin).exp(),
426            (log_likelihood + margin).exp(),
427        ))
428    }
429
430    // Helper methods
431    fn build_dense_transition_matrix(&self) -> Result<DMatrix<f64>, String> {
432        let states = self.get_unique_states();
433        let n = states.len();
434        let mut matrix = DMatrix::zeros(n, n);
435
436        for (i, from_state) in states.iter().enumerate() {
437            if let Some(node) = self.contexts.get(&vec![from_state.clone()]) {
438                for (j, to_state) in states.iter().enumerate() {
439                    if let Some(&prob) = node.probabilities.get(to_state) {
440                        matrix[(i, j)] = prob;
441                    }
442                }
443            }
444        }
445
446        Ok(matrix)
447    }
448
449    /// Original method replacement - add the robust version
450    //fn find_stationary_distribution(&self, matrix: &DMatrix<f64>) -> Result<DVector<f64>, String> {
451    //    Ok(self.find_stationary_distribution_robust(matrix))
452    //}
453
454    fn get_unique_states(&self) -> Vec<String> {
455        let mut states = std::collections::HashSet::new();
456        for context in self.contexts.keys() {
457            for state in context {
458                states.insert(state.clone());
459            }
460        }
461        for node in self.contexts.values() {
462            for state in node.counts.keys() {
463                states.insert(state.clone());
464            }
465        }
466        states.into_iter().collect()
467    }
468
469    fn calculate_observed_frequency(&self, target_state: &str, sequence: &[String]) -> f64 {
470        let count = sequence.iter().filter(|&s| s == target_state).count();
471        count as f64 / sequence.len() as f64
472    }
473
474    fn count_connected_components(&self, _sequence: &[String]) -> usize {
475        // Simplified implementation
476        self.contexts.len()
477    }
478
479    fn count_approximate_cycles(&self, sequence: &[String]) -> usize {
480        // Count approximate cycles by looking for repeated subsequences
481        let mut cycles = 0;
482        for len in 2..=sequence.len() / 2 {
483            for i in 0..=sequence.len() - 2 * len {
484                if sequence[i..i + len] == sequence[i + len..i + 2 * len] {
485                    cycles += 1;
486                }
487            }
488        }
489        cycles
490    }
491
492    fn calculate_local_clustering(&self, _sequence: &[String]) -> f64 {
493        // Simplified clustering coefficient
494        let mut clustering_sum = 0.0;
495        let mut node_count = 0;
496
497        for (_context, node) in &self.contexts {
498            if node.counts.len() > 1 {
499                let connections = node.counts.len();
500                let possible_connections = connections * (connections - 1) / 2;
501                if possible_connections > 0 {
502                    clustering_sum += connections as f64 / possible_connections as f64;
503                    node_count += 1;
504                }
505            }
506        }
507
508        if node_count > 0 {
509            clustering_sum / node_count as f64
510        } else {
511            0.0
512        }
513    }
514}
515
516impl ContextNode {
517    fn new() -> Self {
518        Self {
519            counts: HashMap::new(),
520            probabilities: HashMap::new(),
521            information_content: 0.0,
522            entropy: 0.0,
523            kl_divergence: 0.0,
524        }
525    }
526}
527
528// Performance-optimized parallel processing
529pub fn batch_process_sequences(
530    sequences: &[Vec<String>],
531    max_order: usize,
532    threshold: f64,
533) -> Vec<Vec<AnomalyScore>> {
534    sequences
535        .par_iter()
536        .map(|sequence| {
537            let mut model = AdvancedTransitionModel::new(max_order);
538            model.build_context_tree(sequence).unwrap_or_else(|e| {
539                eprintln!("Error building context tree: {}", e);
540            });
541            model.detect_advanced_anomalies(sequence, threshold)
542        })
543        .collect()
544}
545
546#[cfg(test)]
547mod tests {
548    use super::*;
549
550    #[test]
551    fn test_advanced_anomaly_detection() {
552        let sequence: Vec<String> = vec!["A", "B", "C", "A", "B", "C", "A", "D", "E", "F", "A"]
553            .into_iter()
554            .map(String::from)
555            .collect();
556
557        let mut model = AdvancedTransitionModel::new(3);
558        model.build_context_tree(&sequence).unwrap();
559
560        let anomalies = model.detect_advanced_anomalies(&sequence, 0.1);
561
562        assert!(!anomalies.is_empty());
563
564        for anomaly in &anomalies {
565            println!("Advanced Anomaly Detected:");
566            println!("  Sequence: {:?}", anomaly.state_sequence);
567            println!("  Likelihood: {:.6}", anomaly.likelihood);
568            println!(
569                "  Information Score: {:.6}",
570                anomaly.information_theoretic_score
571            );
572            println!("  Spectral Score: {:.6}", anomaly.spectral_anomaly_score);
573            println!(
574                "  Quantum Coherence: {:.6}",
575                anomaly.quantum_coherence_measure
576            );
577            println!("  Confidence Interval: {:?}", anomaly.confidence_interval);
578            println!(
579                "  Topological Signature: {:?}",
580                anomaly.topological_signature
581            );
582            println!();
583        }
584    }
585
586    #[test]
587    fn test_spectral_analysis() {
588        let sequence: Vec<String> = vec!["A", "B", "A", "B", "A", "C", "A", "C"]
589            .into_iter()
590            .map(String::from)
591            .collect();
592
593        let mut model = AdvancedTransitionModel::new(2);
594        model.build_context_tree(&sequence).unwrap();
595
596        assert!(model.spectral_decomposition.is_some());
597
598        let spectral = model.spectral_decomposition.as_ref().unwrap();
599        println!("Spectral Analysis:");
600        println!("  Mixing time: {:.4}", spectral.mixing_time);
601        println!("  Spectral gap: {:.4}", spectral.spectral_gap);
602        println!(
603            "  Stationary distribution: {:?}",
604            spectral.stationary_distribution
605        );
606    }
607}
608pub mod func_tests;
609pub mod test_runner;