crate_activity/
pearson.rs

1crate::ix!();
2
3pub fn pearson_correlation(x: &[i64], y: &[i64]) -> f64 {
4
5    if x.len() != y.len() || x.is_empty() {
6        return 0.0; // Handle mismatched or empty data
7    }
8
9    let n = x.len() as f64;
10
11    // Convert to f64 early to prevent overflow or precision loss
12    let sum_x: f64 = x.iter().map(|&xi| xi as f64).sum();
13    let sum_y: f64 = y.iter().map(|&yi| yi as f64).sum();
14    let sum_x_squared: f64 = x.iter().map(|&xi| (xi as f64).powi(2)).sum();
15    let sum_y_squared: f64 = y.iter().map(|&yi| (yi as f64).powi(2)).sum();
16    let sum_xy: f64 = x.iter().zip(y).map(|(&xi, &yi)| (xi as f64) * (yi as f64)).sum();
17
18    let numerator = sum_xy - ((sum_x * sum_y) / n);
19    let denominator = ((sum_x_squared - (sum_x.powi(2) / n)) * (sum_y_squared - (sum_y.powi(2) / n))).sqrt();
20
21    if denominator == 0.0 {
22        0.0 // No correlation if denominator is zero
23    } else {
24        numerator / denominator
25    }
26}
27
28#[cfg(test)]
29mod pearson_correlation_tests {
30    use super::*;
31
32    #[test]
33    fn test_empty_inputs() {
34        let x: Vec<i64> = vec![];
35        let y: Vec<i64> = vec![];
36        let result = pearson_correlation(&x, &y);
37        assert_eq!(result, 0.0, "Empty inputs should return 0.0.");
38    }
39
40    #[test]
41    fn test_mismatched_lengths() {
42        let x = vec![1, 2, 3];
43        let y = vec![1, 2];
44        let result = pearson_correlation(&x, &y);
45        assert_eq!(result, 0.0, "Mismatched lengths should return 0.0.");
46    }
47
48    #[test]
49    fn test_all_zeros() {
50        let x = vec![0, 0, 0];
51        let y = vec![0, 0, 0];
52        let result = pearson_correlation(&x, &y);
53        assert_eq!(result, 0.0, "All zeros should return 0.0.");
54    }
55
56    #[test]
57    fn test_perfect_positive_correlation() {
58        let x = vec![1, 2, 3];
59        let y = vec![2, 4, 6];
60        let result = pearson_correlation(&x, &y);
61        assert!((result - 1.0).abs() < 1e-9, "Perfect positive correlation should return 1.0.");
62    }
63
64    #[test]
65    fn test_perfect_negative_correlation() {
66        let x = vec![1, 2, 3];
67        let y = vec![6, 4, 2];
68        let result = pearson_correlation(&x, &y);
69        assert!((result + 1.0).abs() < 1e-9, "Perfect negative correlation should return -1.0.");
70    }
71
72    #[test]
73    fn test_no_correlation() {
74        let x = vec![1, 2, 3];
75        let y = vec![2, 2, 2];
76        let result = pearson_correlation(&x, &y);
77        assert_eq!(result, 0.0, "No correlation should return 0.0.");
78    }
79
80    #[test]
81    fn test_single_element() {
82        let x = vec![1];
83        let y = vec![2];
84        let result = pearson_correlation(&x, &y);
85        assert_eq!(result, 0.0, "Single-element inputs should return 0.0.");
86    }
87
88    #[test]
89    fn test_high_variance_with_noise() {
90        let x = vec![1, 2, 3, 4, 5];
91        let y = vec![10, 9, 8, 7, 6]; // Negative correlation with noise
92        let result = pearson_correlation(&x, &y);
93        assert!(result < 0.0, "Should return a negative correlation for this dataset.");
94    }
95
96    #[test]
97    fn test_large_inputs() {
98        let size = 1000; // A large dataset
99        let x: Vec<i64> = (1..=size).collect();
100        let y: Vec<i64> = (1..=size).collect();
101
102        let result = pearson_correlation(&x, &y);
103        assert!((result - 1.0).abs() < 1e-9, "Large identical ranges should have perfect correlation.");
104    }
105
106    #[test]
107    fn test_mixed_positive_and_negative_values() {
108        let x = vec![1, 2, 3, 4, 5];
109        let y = vec![1, -2, 3, -4, 5];
110        let result = pearson_correlation(&x, &y);
111        assert!(result.abs() < 0.5, "Mixed positive and negative values should have low correlation.");
112    }
113
114    #[test]
115    fn test_uniformly_spaced_values() {
116        let x = vec![1, 3, 5, 7, 9];
117        let y = vec![2, 4, 6, 8, 10];
118        let result = pearson_correlation(&x, &y);
119        assert!((result - 1.0).abs() < 1e-9, "Uniformly spaced values should have perfect positive correlation.");
120    }
121
122    #[test]
123    fn test_precision_sensitivity() {
124        let x = vec![1_000_000, 1_000_001, 1_000_002];
125        let y = vec![2_000_000, 2_000_001, 2_000_002];
126        let result = pearson_correlation(&x, &y);
127        assert!((result - 1.0).abs() < 1e-9, "Close values should still yield perfect positive correlation.");
128    }
129
130    #[test]
131    fn test_randomized_datasets() {
132        use rand::Rng;
133
134        let mut rng = rand::thread_rng();
135        let x: Vec<i64> = (0..100).map(|_| rng.gen_range(0..1000)).collect();
136        let y: Vec<i64> = x.iter().map(|&xi| xi + rng.gen_range(0..10)).collect();
137        let result = pearson_correlation(&x, &y);
138        assert!(result > 0.9, "Randomized correlated datasets should have high positive correlation.");
139    }
140}