anomaly_grid/
lib.rs

1use nalgebra::{Complex, DMatrix, DVector, RealField};
2use ndarray::{Array1, Array2, Array3};
3//use rand_distr::Normal; found a better approach
4use rayon::prelude::*;
5use std::collections::HashMap;
6
7// Advanced type definitions
8pub type StateSpace = Vec<String>;
9pub type ProbabilityTensor = Array3<f64>;
10pub type QuantumState = Array1<Complex<f64>>;
11
12#[derive(Debug, Clone)]
13pub struct AdvancedTransitionModel {
14    // Higher-order Markov model with variable context length
15    pub contexts: HashMap<Vec<String>, ContextNode>,
16    pub max_order: usize,
17    pub quantum_representation: Option<QuantumState>,
18    pub spectral_decomposition: Option<SpectralAnalysis>,
19}
20
21#[derive(Debug, Clone)]
22pub struct ContextNode {
23    pub counts: HashMap<String, usize>,
24    pub probabilities: HashMap<String, f64>,
25    pub information_content: f64,
26    pub entropy: f64,
27    pub kl_divergence: f64,
28}
29
30#[derive(Debug, Clone)]
31pub struct SpectralAnalysis {
32    pub eigenvalues: DVector<Complex<f64>>,
33    pub eigenvectors: DMatrix<Complex<f64>>,
34    pub stationary_distribution: DVector<f64>,
35    pub mixing_time: f64,
36    pub spectral_gap: f64,
37}
38
39#[derive(Debug, Clone)]
40pub struct AnomalyScore {
41    pub state_sequence: Vec<String>,
42    pub likelihood: f64,
43    pub information_theoretic_score: f64,
44    pub spectral_anomaly_score: f64,
45    pub quantum_coherence_measure: f64,
46    pub topological_signature: Vec<f64>,
47    pub confidence_interval: (f64, f64),
48}
49
50impl AdvancedTransitionModel {
51    pub fn new(max_order: usize) -> Self {
52        Self {
53            contexts: HashMap::new(),
54            max_order,
55            quantum_representation: None,
56            spectral_decomposition: None,
57        }
58    }
59
60    /// Build variable-order Markov model using Context Tree Weighting
61    pub fn build_context_tree(&mut self, sequence: &[String]) -> Result<(), String> {
62        // Implementation of Context Tree Weighting algorithm
63        for window_size in 1..=self.max_order {
64            for window in sequence.windows(window_size + 1) {
65                let context = window[..window_size].to_vec();
66                let next_state = &window[window_size];
67
68                let node = self
69                    .contexts
70                    .entry(context.clone())
71                    .or_insert_with(|| ContextNode::new());
72
73                *node.counts.entry(next_state.clone()).or_insert(0) += 1;
74            }
75        }
76
77        // Calculate probabilities and information measures
78        self.calculate_information_measures()?;
79        self.perform_spectral_analysis()?;
80        self.generate_quantum_representation()?;
81
82        Ok(())
83    }
84
85    /// Calculate advanced information-theoretic measures
86    fn calculate_information_measures(&mut self) -> Result<(), String> {
87        for (_context, node) in &mut self.contexts {
88            let total_count: usize = node.counts.values().sum();
89
90            // Calculate probabilities
91            for (state, &count) in &node.counts {
92                let prob = count as f64 / total_count as f64;
93                node.probabilities.insert(state.clone(), prob);
94            }
95
96            // Shannon entropy
97            node.entropy = node
98                .probabilities
99                .values()
100                .map(|&p| if p > 0.0 { -p * p.log2() } else { 0.0 })
101                .sum();
102
103            // Information content (surprise)
104            node.information_content = -node.entropy.log2();
105
106            // KL divergence from uniform distribution
107            let uniform_prob = 1.0 / node.probabilities.len() as f64;
108            node.kl_divergence = node
109                .probabilities
110                .values()
111                .map(|&p| {
112                    if p > 0.0 {
113                        p * (p / uniform_prob).log2()
114                    } else {
115                        0.0
116                    }
117                })
118                .sum();
119        }
120        Ok(())
121    }
122
123    //helper function:
124    // Normalize matrix rows to sum to 1 (make stochastic)
125    fn normalize_matrix_rows(&self, matrix: &DMatrix<f64>) -> DMatrix<f64> {
126        let mut normalized = matrix.clone();
127        for i in 0..normalized.nrows() {
128            let row_sum: f64 = normalized.row(i).sum();
129            if row_sum > 1e-15 {
130                for j in 0..normalized.ncols() {
131                    normalized[(i, j)] /= row_sum;
132                }
133            }
134        }
135        normalized
136    }
137
138    /// Perform spectral analysis of the transition matrix
139    fn perform_spectral_analysis(&mut self) -> Result<(), String> {
140        // Early return if no contexts exist
141        if self.contexts.is_empty() {
142            return Ok(());
143        }
144
145        let matrix = match self.build_dense_transition_matrix() {
146            Ok(m) => self.normalize_matrix_rows(&m),
147            Err(_) => return Ok(()),
148        };
149
150        // Skip if matrix is empty
151        if matrix.nrows() == 0 {
152            return Ok(());
153        }
154
155        let eigenvalues = matrix.complex_eigenvalues();
156
157        // Use robust stationary distribution finder
158        let stationary_dist = self.find_stationary_distribution_robust(&matrix);
159
160        let mut sorted_eigenvals: Vec<f64> = eigenvalues.iter().map(|c| c.norm()).collect();
161        sorted_eigenvals.sort_by(|a, b| b.partial_cmp(a).unwrap_or(std::cmp::Ordering::Equal));
162
163        let spectral_gap = if sorted_eigenvals.len() > 1 {
164            (sorted_eigenvals[0] - sorted_eigenvals[1]).max(0.0)
165        } else {
166            0.0
167        };
168
169        let mixing_time = if sorted_eigenvals.len() > 1 {
170            let lambda_2 = sorted_eigenvals[1];
171            if lambda_2 > 1e-10 && lambda_2 < 1.0 {
172                (-0.368_f64.ln() / lambda_2.ln()).min(1000.0) // Use 1/e threshold
173            } else {
174                f64::INFINITY
175            }
176        } else {
177            f64::INFINITY
178        };
179
180        self.spectral_decomposition = Some(SpectralAnalysis {
181            eigenvalues,
182            eigenvectors: matrix.clone_owned().map(|v| Complex::new(v, 0.0)),
183            stationary_distribution: stationary_dist,
184            mixing_time,
185            spectral_gap,
186        });
187
188        Ok(())
189    }
190
191    /// Fixed stationary distribution finder - no more infinite loops
192    fn find_stationary_distribution_robust(&self, matrix: &DMatrix<f64>) -> DVector<f64> {
193        let n = matrix.nrows();
194        if n == 0 {
195            return DVector::from_vec(vec![]);
196        }
197
198        // Try power iteration with limits
199        if let Some(dist) = self.power_iteration_with_limits(matrix) {
200            return dist;
201        }
202
203        // Fallback to uniform distribution
204        DVector::from_element(n, 1.0 / n as f64)
205    }
206    /// Power iteration with proper convergence checks
207    fn power_iteration_with_limits(&self, matrix: &DMatrix<f64>) -> Option<DVector<f64>> {
208        let n = matrix.nrows();
209        let mut dist = DVector::from_element(n, 1.0 / n as f64);
210
211        for _ in 0..100 {
212            // Hard limit: max 100 iterations
213            let new_dist = matrix.transpose() * &dist;
214            let norm = new_dist.norm();
215
216            if norm < 1e-15 {
217                // Vector became zero
218                return None;
219            }
220
221            let new_dist_normalized = new_dist / norm;
222
223            // Check convergence
224            if (&new_dist_normalized - &dist).norm() < 1e-9 {
225                return Some(new_dist_normalized);
226            }
227
228            dist = new_dist_normalized;
229        }
230
231        None // Didn't converge in time
232    }
233
234    /// Generate quantum representation of the Markov model
235    fn generate_quantum_representation(&mut self) -> Result<(), String> {
236        let states = self.get_unique_states();
237        let n_states = states.len();
238
239        // Create quantum superposition state
240        let mut quantum_state = Array1::zeros(n_states);
241
242        // Initialize with equal superposition
243        let amplitude = 1.0 / (n_states as f64).sqrt();
244        for i in 0..n_states {
245            quantum_state[i] = Complex::new(amplitude, 0.0);
246        }
247
248        // Apply quantum phase based on transition probabilities
249        for (i, state) in states.iter().enumerate() {
250            if let Some(context_node) = self.contexts.get(&vec![state.clone()]) {
251                let entropy_phase = context_node.entropy * std::f64::consts::PI;
252                quantum_state[i] = Complex::new(
253                    amplitude * entropy_phase.cos(),
254                    amplitude * entropy_phase.sin(),
255                );
256            }
257        }
258
259        self.quantum_representation = Some(quantum_state);
260        Ok(())
261    }
262
263    /// Advanced anomaly detection using multiple mathematical approaches
264    pub fn detect_advanced_anomalies(
265        &self,
266        sequence: &[String],
267        _threshold: f64,
268    ) -> Vec<AnomalyScore> {
269        sequence
270            .windows(self.max_order + 1)
271            .par_bridge()
272            // Compute a full anomaly score for each window;
273            // only windows that successfully produce a score are kept.
274            .filter_map(|window| self.calculate_comprehensive_anomaly_score(window))
275            .collect()
276    }
277
278    /// Calculate comprehensive anomaly score using multiple mathematical measures
279    fn calculate_comprehensive_anomaly_score(&self, sequence: &[String]) -> Option<AnomalyScore> {
280        // Likelihood-based score
281        let likelihood = self.calculate_sequence_likelihood(sequence)?;
282
283        // Information-theoretic score
284        let info_score = self.calculate_information_score(sequence)?;
285
286        // Spectral anomaly score
287        let spectral_score = self.calculate_spectral_anomaly_score(sequence)?;
288
289        // Quantum coherence measure
290        let quantum_score = self.calculate_quantum_coherence(sequence)?;
291
292        // Topological signature
293        let topo_signature = self.calculate_topological_signature(sequence)?;
294
295        // Bayesian confidence interval
296        let confidence_interval = self.calculate_bayesian_confidence_interval(sequence)?;
297
298        Some(AnomalyScore {
299            state_sequence: sequence.to_vec(),
300            likelihood,
301            information_theoretic_score: info_score,
302            spectral_anomaly_score: spectral_score,
303            quantum_coherence_measure: quantum_score,
304            topological_signature: topo_signature,
305            confidence_interval,
306        })
307    }
308
309    /// Calculate sequence likelihood using variable-order context
310    fn calculate_sequence_likelihood(&self, sequence: &[String]) -> Option<f64> {
311        let mut likelihood = 1.0;
312
313        for i in 1..sequence.len() {
314            let max_context_len = std::cmp::min(i, self.max_order);
315            let mut best_prob = 0.0;
316
317            for context_len in 1..=max_context_len {
318                let context = sequence[i - context_len..i].to_vec();
319                if let Some(node) = self.contexts.get(&context) {
320                    if let Some(&prob) = node.probabilities.get(&sequence[i]) {
321                        //  removed mathematically invalid sqrt division
322                        // Using the raw probability - it iss already the maximum likelihood estimate
323                        best_prob = best_prob.max(prob);
324                    }
325                }
326            }
327
328            if best_prob > 0.0 {
329                likelihood *= best_prob;
330            } else {
331                // Laplace smoothing for unseen transitions
332                likelihood *= 1e-10;
333            }
334        }
335
336        Some(likelihood)
337    }
338
339    /// Calculate information-theoretic anomaly score
340    fn calculate_information_score(&self, sequence: &[String]) -> Option<f64> {
341        let mut total_surprise = 0.0;
342        let mut context_entropy = 0.0;
343
344        for i in 1..sequence.len() {
345            let context_len = std::cmp::min(i, self.max_order);
346            let context = sequence[i - context_len..i].to_vec();
347
348            if let Some(node) = self.contexts.get(&context) {
349                // Add entropy of the context
350                context_entropy += node.entropy;
351
352                // Add surprise (information content) of the transition
353                if let Some(&prob) = node.probabilities.get(&sequence[i]) {
354                    total_surprise += -prob.log2();
355                }
356            }
357        }
358
359        // Normalize by sequence length
360        Some((total_surprise + context_entropy) / sequence.len() as f64)
361    }
362
363    /// Calculate spectral anomaly score based on eigenvalue analysis
364    fn calculate_spectral_anomaly_score(&self, sequence: &[String]) -> Option<f64> {
365        let spectral = self.spectral_decomposition.as_ref()?;
366
367        // Calculate deviation from expected stationary behavior
368        let mut deviation = 0.0;
369        let states = self.get_unique_states();
370
371        for (_, state) in sequence.iter().enumerate() {
372            if let Some(state_idx) = states.iter().position(|s| s == state) {
373                let expected_prob = spectral.stationary_distribution[state_idx];
374                let observed_freq = self.calculate_observed_frequency(state, sequence);
375                deviation += (observed_freq - expected_prob).abs();
376            }
377        }
378
379        Some(deviation / sequence.len() as f64)
380    }
381
382    /// Calculate quantum coherence measure
383    fn calculate_quantum_coherence(&self, _sequence: &[String]) -> Option<f64> {
384        let quantum_state = self.quantum_representation.as_ref()?;
385        let states = self.get_unique_states();
386
387        // Calculate von Neumann entropy of the quantum state
388        let mut density_matrix = Array2::zeros((states.len(), states.len()));
389
390        for i in 0..states.len() {
391            for j in 0..states.len() {
392                density_matrix[[i, j]] = (quantum_state[i].conj() * quantum_state[j]).re;
393            }
394        }
395
396        // Simplified coherence measure (should use proper eigenvalue decomposition)
397        let trace = (0..states.len())
398            .map(|i| density_matrix[[i, i]])
399            .sum::<f64>();
400        let coherence = 1.0 - trace / states.len() as f64;
401
402        Some(coherence)
403    }
404
405    /// Calculate topological signature using persistent homology concepts
406    fn calculate_topological_signature(&self, sequence: &[String]) -> Option<Vec<f64>> {
407        // Simplified topological analysis
408        // In full implementation, would use persistent homology libraries
409
410        let mut signature = Vec::new();
411
412        // Calculate different topological invariants
413        // 1. Connected components (Betti-0)
414        let components = self.count_connected_components(sequence);
415        signature.push(components as f64);
416
417        // 2. Cycles (simplified Betti-1)
418        let cycles = self.count_approximate_cycles(sequence);
419        signature.push(cycles as f64);
420
421        // 3. Local clustering coefficient
422        let clustering = self.calculate_local_clustering(sequence);
423        signature.push(clustering);
424
425        Some(signature)
426    }
427
428    /// Calculate Bayesian confidence interval for anomaly score
429    fn calculate_bayesian_confidence_interval(&self, sequence: &[String]) -> Option<(f64, f64)> {
430        // Simplified Bayesian interval calculation
431        // In full implementation, would use MCMC sampling
432
433        let likelihood = self.calculate_sequence_likelihood(sequence)?;
434        let log_likelihood = likelihood.ln();
435
436        // Use normal approximation for confidence interval
437        let std_error = (1.0 / sequence.len() as f64).sqrt();
438        let margin = 1.96 * std_error; // 95% confidence interval
439
440        Some((
441            (log_likelihood - margin).exp(),
442            (log_likelihood + margin).exp(),
443        ))
444    }
445
446    // Helper methods
447    fn build_dense_transition_matrix(&self) -> Result<DMatrix<f64>, String> {
448        let states = self.get_unique_states();
449        let n = states.len();
450        let mut matrix = DMatrix::zeros(n, n);
451
452        for (i, from_state) in states.iter().enumerate() {
453            if let Some(node) = self.contexts.get(&vec![from_state.clone()]) {
454                for (j, to_state) in states.iter().enumerate() {
455                    if let Some(&prob) = node.probabilities.get(to_state) {
456                        matrix[(i, j)] = prob;
457                    }
458                }
459            }
460        }
461
462        Ok(matrix)
463    }
464
465    /// Original method replacement - add the robust version
466    //fn find_stationary_distribution(&self, matrix: &DMatrix<f64>) -> Result<DVector<f64>, String> {
467    //    Ok(self.find_stationary_distribution_robust(matrix))
468    //}
469
470    fn get_unique_states(&self) -> Vec<String> {
471        let mut states = std::collections::HashSet::new();
472        for context in self.contexts.keys() {
473            for state in context {
474                states.insert(state.clone());
475            }
476        }
477        for node in self.contexts.values() {
478            for state in node.counts.keys() {
479                states.insert(state.clone());
480            }
481        }
482        states.into_iter().collect()
483    }
484
485    fn calculate_observed_frequency(&self, target_state: &str, sequence: &[String]) -> f64 {
486        let count = sequence.iter().filter(|&s| s == target_state).count();
487        count as f64 / sequence.len() as f64
488    }
489
490    fn count_connected_components(&self, _sequence: &[String]) -> usize {
491        // Simplified implementation
492        self.contexts.len()
493    }
494
495    fn count_approximate_cycles(&self, sequence: &[String]) -> usize {
496        // Count approximate cycles by looking for repeated subsequences
497        let mut cycles = 0;
498        for len in 2..=sequence.len() / 2 {
499            for i in 0..=sequence.len() - 2 * len {
500                if sequence[i..i + len] == sequence[i + len..i + 2 * len] {
501                    cycles += 1;
502                }
503            }
504        }
505        cycles
506    }
507
508    fn calculate_local_clustering(&self, _sequence: &[String]) -> f64 {
509        // Simplified clustering coefficient
510        let mut clustering_sum = 0.0;
511        let mut node_count = 0;
512
513        for (_context, node) in &self.contexts {
514            if node.counts.len() > 1 {
515                let connections = node.counts.len();
516                let possible_connections = connections * (connections - 1) / 2;
517                if possible_connections > 0 {
518                    clustering_sum += connections as f64 / possible_connections as f64;
519                    node_count += 1;
520                }
521            }
522        }
523
524        if node_count > 0 {
525            clustering_sum / node_count as f64
526        } else {
527            0.0
528        }
529    }
530}
531
532impl ContextNode {
533    fn new() -> Self {
534        Self {
535            counts: HashMap::new(),
536            probabilities: HashMap::new(),
537            information_content: 0.0,
538            entropy: 0.0,
539            kl_divergence: 0.0,
540        }
541    }
542}
543
544// Performance-optimized parallel processing
545pub fn batch_process_sequences(
546    sequences: &[Vec<String>],
547    max_order: usize,
548    threshold: f64,
549) -> Vec<Vec<AnomalyScore>> {
550    sequences
551        .par_iter()
552        .map(|sequence| {
553            let mut model = AdvancedTransitionModel::new(max_order);
554            model.build_context_tree(sequence).unwrap_or_else(|e| {
555                eprintln!("Error building context tree: {}", e);
556            });
557            model.detect_advanced_anomalies(sequence, threshold)
558        })
559        .collect()
560}
561
562#[cfg(test)]
563mod tests {
564    use super::*;
565
566    #[test]
567    fn test_advanced_anomaly_detection() {
568        let sequence: Vec<String> = vec!["A", "B", "C", "A", "B", "C", "A", "D", "E", "F", "A"]
569            .into_iter()
570            .map(String::from)
571            .collect();
572
573        let mut model = AdvancedTransitionModel::new(3);
574        model.build_context_tree(&sequence).unwrap();
575
576        let anomalies = model.detect_advanced_anomalies(&sequence, 0.1);
577
578        assert!(!anomalies.is_empty());
579
580        for anomaly in &anomalies {
581            println!("Advanced Anomaly Detected:");
582            println!("  Sequence: {:?}", anomaly.state_sequence);
583            println!("  Likelihood: {:.6}", anomaly.likelihood);
584            println!(
585                "  Information Score: {:.6}",
586                anomaly.information_theoretic_score
587            );
588            println!("  Spectral Score: {:.6}", anomaly.spectral_anomaly_score);
589            println!(
590                "  Quantum Coherence: {:.6}",
591                anomaly.quantum_coherence_measure
592            );
593            println!("  Confidence Interval: {:?}", anomaly.confidence_interval);
594            println!(
595                "  Topological Signature: {:?}",
596                anomaly.topological_signature
597            );
598            println!();
599        }
600    }
601
602    #[test]
603    fn test_spectral_analysis() {
604        let sequence: Vec<String> = vec!["A", "B", "A", "B", "A", "C", "A", "C"]
605            .into_iter()
606            .map(String::from)
607            .collect();
608
609        let mut model = AdvancedTransitionModel::new(2);
610        model.build_context_tree(&sequence).unwrap();
611
612        assert!(model.spectral_decomposition.is_some());
613
614        let spectral = model.spectral_decomposition.as_ref().unwrap();
615        println!("Spectral Analysis:");
616        println!("  Mixing time: {:.4}", spectral.mixing_time);
617        println!("  Spectral gap: {:.4}", spectral.spectral_gap);
618        println!(
619            "  Stationary distribution: {:?}",
620            spectral.stationary_distribution
621        );
622    }
623}
624pub mod func_tests;
625pub mod test_runner;