use statrs;
use statrs::distribution::{FisherSnedecor, ContinuousCDF, StudentsT};
pub fn calculate_residuals(x: &Vec<f64>, y: &Vec<f64>, beta_0: f64, beta_1: f64) -> Vec<f64> {
x.iter().zip(y.iter())
.map(|(&x_i, &y_i)| y_i - (beta_0 + beta_1 * x_i)).collect()
}
pub fn calculate_coefficients_t_and_p_values(x: &Vec<f64>, beta_0: f64, beta_1: f64, see: f64) -> ((f64, f64), (f64, f64)) {
let n: f64 = x.len() as f64;
let x_bar: f64 = x.iter().sum::<f64>() / n;
let sum_squared_x_minus_x_bar: f64 = x.iter().map(|&x_i| (x_i - x_bar).powi(2)).sum();
let se_beta_0: f64 = see * ((1.0 / n + x_bar.powi(2) / sum_squared_x_minus_x_bar).sqrt());
let se_beta_1: f64 = see / (sum_squared_x_minus_x_bar.sqrt());
let t_beta_0: f64 = beta_0 / se_beta_0;
let t_beta_1: f64 = beta_1 / se_beta_1;
let dof: f64 = n - 2.0; let t_dist: StudentsT = StudentsT::new(0.0, 1.0, dof).unwrap();
let p_beta_0: f64 = 2.0 * (1.0 - t_dist.cdf(t_beta_0.abs()));
let p_beta_1: f64 = 2.0 * (1.0 - t_dist.cdf(t_beta_1.abs()));
((t_beta_0, p_beta_0), (t_beta_1, p_beta_1))
}
pub fn calculate_f_statistic(x: &Vec<f64>, y: &Vec<f64>, beta_0: f64, beta_1: f64) -> (f64, f64) {
let n: f64 = x.len() as f64;
let p: f64 = 1.0; let y_bar: f64 = y.iter().sum::<f64>() / n;
let tss: f64 = y.iter().map(|&y_i| (y_i - y_bar).powi(2)).sum();
let rss: f64 = x.iter().zip(y.iter())
.map(|(&x_i, &y_i)| {
let y_hat_i = beta_0 + beta_1 * x_i;
(y_i - y_hat_i).powi(2)
}).sum();
let f_statistic: f64 = ((tss - rss) / p) / (rss / (n - p - 1.0));
let dof1: f64 = p as f64; let dof2: f64 = n - p - 1.0; let f_dist: FisherSnedecor = FisherSnedecor::new(dof1, dof2).unwrap();
let p_value: f64 = 1.0 - f_dist.cdf(f_statistic);
(f_statistic, p_value)
}
pub fn calculate_see(x: &Vec<f64>, y: &Vec<f64>, beta_0: f64, beta_1: f64) -> f64 {
let n: f64 = x.len() as f64;
let sum_squared_residuals: f64 = x.iter().zip(y.iter())
.map(|(&x_i, &y_i)| {
let y_hat_i = beta_0 + beta_1 * x_i;
(y_i - y_hat_i).powi(2)
}).sum();
let see: f64 = (sum_squared_residuals / (n - 2.0)).sqrt();
see
}
pub fn calculate_r_squared(x: &Vec<f64>, y: &Vec<f64>) -> f64 {
let n: f64 = x.len() as f64;
let sum_x: f64 = x.iter().sum();
let sum_y: f64 = y.iter().sum();
let sum_xx: f64 = x.iter().map(|&x| x.powi(2)).sum();
let sum_yy: f64 = y.iter().map(|&y| y.powi(2)).sum();
let sum_xy: f64 = x.iter().zip(y.iter()).map(|(&x, &y)| x * y).sum();
let r: f64 = (n * sum_xy - sum_x * sum_y) / ((n * sum_xx - sum_x.powi(2)) * (n * sum_yy - sum_y.powi(2))).sqrt();
let r_squared = r.powi(2);
r_squared
}
pub fn simple_linear_regression(x: &Vec<f64>, y: &Vec<f64>) -> Result<((f64, f64), Vec<f64>), String> {
if x.len() != y.len() {
return Err("Input vectors have different sizes".to_string());
}
let n: f64 = x.len() as f64;
let sum_x: f64 = x.iter().sum();
let sum_y: f64 = y.iter().sum();
let sum_xx: f64 = x.iter().map(|&x| x.powi(2)).sum();
let sum_xy: f64 = x.iter().zip(y.iter()).map(|(&x, &y)| x * y).sum();
let denominator: f64 = n * sum_xx - sum_x.powi(2);
if denominator.abs() < std::f64::EPSILON {
return Err("The variance of x values is zero".to_string());
}
let beta_1: f64 = (n * sum_xy - sum_x * sum_y) / denominator;
let beta_0: f64 = sum_y / n - beta_1 * sum_x / n;
let residuals: Vec<f64> = calculate_residuals(x, y, beta_0, beta_1);
Ok(((beta_0, beta_1), residuals))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::get_test_data;
#[test]
fn tests_simple_linear_regression() {
let (series_1, series_2) = get_test_data();
let x: Vec<f64> = series_1; let y: Vec<f64> = series_2; let ((beta_0, beta_1), residuals) = simple_linear_regression(&x, &y).unwrap();
dbg!(&beta_0);
dbg!(&beta_1);
assert!(beta_1 > 0.0);
}
}