use super::rounded::RoundToNthDigit;
#[derive(Debug)]
pub struct LinearResult {
pub slope: f64,
pub intercept: f64,
pub r2: f64,
}
pub trait LinearRegression {
fn single_linear(&self) -> LinearResult;
}
impl LinearRegression for [f64] {
fn single_linear(&self) -> LinearResult {
if self.is_empty() {
return LinearResult {
slope: 0.0,
intercept: 0.0,
r2: 0.0,
};
}
let sample_size = self.len() as f64;
let sum_x = (sample_size - 1.0) * sample_size / 2.0;
let sum_x_squared = (sample_size - 1.0) * sample_size * (2.0 * sample_size - 1.0) / 6.0;
let (sum_xy, sum_y) = self.iter().enumerate().fold((0.0, 0.0), |acc, (i, &y1)| {
(acc.0 + (i as f64) * y1, acc.1 + y1)
});
let denominator = sample_size * sum_x_squared - sum_x * sum_x;
if denominator == 0.0 {
return LinearResult {
slope: 0.0,
intercept: 0.0,
r2: 0.0,
};
}
let y_intercept = (1.0 / denominator) * (sum_x_squared * sum_y - sum_x * sum_xy);
let slope = (1.0 / denominator) * (sample_size * sum_xy - sum_x * sum_y);
let y_mean = sum_y / sample_size;
let (ss_tot, ss_err) = self.iter().enumerate().fold((0.0, 0.0), |acc, (i, &y1)| {
let y_diff = y1 - y_mean;
let predicted = slope * (i as f64) + y_intercept;
let err = y1 - predicted;
(acc.0 + y_diff * y_diff, acc.1 + err * err)
});
let rsq = 1.0 - ss_err / (ss_tot + 0.00001);
LinearResult {
slope: slope.round_to_4_digit(),
intercept: y_intercept.round_to_4_digit(),
r2: rsq.round_to_4_digit(),
}
}
}
pub fn pearson_corr(x: &[f64], y: &[f64]) -> Option<f64> {
if x.len() != y.len() || x.is_empty() {
return None;
}
let n = x.len();
let mean_x = x.iter().sum::<f64>() / n as f64;
let mean_y = y.iter().sum::<f64>() / n as f64;
let mut cov = 0.0;
let mut sum_x_sq = 0.0;
let mut sum_y_sq = 0.0;
for i in 0..n {
let diff_x = x[i] - mean_x;
let diff_y = y[i] - mean_y;
cov += diff_x * diff_y;
sum_x_sq += diff_x * diff_x;
sum_y_sq += diff_y * diff_y;
}
let denominator = (sum_x_sq * sum_y_sq).sqrt();
if denominator == 0.0 {
return None;
}
Some(cov / denominator)
}
pub fn spearman_rank_corr(x: &[f64], y: &[f64]) -> Option<f64> {
if x.len() != y.len() || x.is_empty() {
return None;
}
fn compute_ranks(data: &[f64]) -> Vec<f64> {
let mut indexed_data: Vec<(f64, usize)> =
data.iter().enumerate().map(|(i, &val)| (val, i)).collect();
indexed_data.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
let n = data.len();
let mut ranks = vec![0.0; n];
let mut i = 0;
while i < n {
let current_val = indexed_data[i].0;
let mut j = i;
while j < n && indexed_data[j].0 == current_val {
j += 1;
}
let avg_rank = (i + 1 + j) as f64 / 2.0;
for item in &indexed_data[i..j] {
let original_index = item.1;
ranks[original_index] = avg_rank;
}
i = j;
}
ranks
}
let x_ranks = compute_ranks(x);
let y_ranks = compute_ranks(y);
pearson_corr(&x_ranks, &y_ranks)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_single_linear() {
let y = [1.0, 2.0, 3.0, 4.0, 5.0];
let res2 = y.single_linear();
println!("{res2:?}");
}
#[test]
fn test_pearson_corr() {
let a = [1.0, 2.0, -3.0, -1.0, 5.0, 1.0, -7.0, 6.0, 16.0];
let b = [1.0, 2.0, -3.0, -1.0, 5.0, 1.0, -7.0, -6.0, 16.0];
assert_eq!(
pearson_corr(&a, &b).map(|x| x.round_to_4_digit()),
Some(0.8215)
)
}
#[test]
fn test_spearman_rank_corr() {
let a = [1.0, 2.0, -3.0, -1.0, 5.0, 1.0, -7.0, 6.0, 16.0];
let b = [1.0, 2.0, -3.0, -1.0, 5.0, 1.0, -7.0, -6.0, 16.0];
assert_eq!(
spearman_rank_corr(&a, &b).map(|x| x.round_to_4_digit()),
Some(0.6471)
)
}
}