use crate::error::{Error, Result};
use crate::optimized::OptimizedDataFrame;
use crate::series::Series;
use std::cmp::Ordering;
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_mean_squared_error() {
let y_true = vec![3.0, 5.0, 2.5, 7.0, 10.0];
let y_pred = vec![2.8, 4.8, 2.7, 7.2, 9.8];
let mse = mean_squared_error(&y_true, &y_pred).expect("operation should succeed");
assert!((mse - 0.2 / 5.0).abs() < 1e-6); }
#[test]
fn test_r2_score() {
let y_true = vec![3.0, 5.0, 2.5, 7.0, 10.0];
let y_pred = vec![2.8, 4.8, 2.7, 7.2, 9.8];
let r2 = r2_score(&y_true, &y_pred).expect("operation should succeed");
assert!(r2 > 0.99); }
#[test]
fn test_empty_input() {
let empty: Vec<f64> = vec![];
let mse_result = mean_squared_error(&empty, &empty);
assert!(mse_result.is_err());
let r2_result = r2_score(&empty, &empty);
assert!(r2_result.is_err());
}
#[test]
fn test_different_length() {
let y_true = vec![1.0, 2.0, 3.0];
let y_pred = vec![1.0, 2.0];
let mse_result = mean_squared_error(&y_true, &y_pred);
assert!(mse_result.is_err());
let r2_result = r2_score(&y_true, &y_pred);
assert!(r2_result.is_err());
}
}
pub fn mean_squared_error(y_true: &[f64], y_pred: &[f64]) -> Result<f64> {
if y_true.len() != y_pred.len() {
return Err(Error::DimensionMismatch(format!(
"Length mismatch between true and predicted values: {} vs {}",
y_true.len(),
y_pred.len()
)));
}
if y_true.is_empty() {
return Err(Error::InvalidOperation(
"Cannot calculate with empty data".to_string(),
));
}
let sum_squared_error = y_true
.iter()
.zip(y_pred.iter())
.map(|(&true_val, &pred_val)| {
let error = true_val - pred_val;
error * error
})
.sum::<f64>();
Ok(sum_squared_error / y_true.len() as f64)
}
pub fn mean_absolute_error(y_true: &[f64], y_pred: &[f64]) -> Result<f64> {
if y_true.len() != y_pred.len() {
return Err(Error::DimensionMismatch(format!(
"Length mismatch between true and predicted values: {} vs {}",
y_true.len(),
y_pred.len()
)));
}
if y_true.is_empty() {
return Err(Error::InvalidOperation(
"Cannot calculate with empty data".to_string(),
));
}
let sum_absolute_error = y_true
.iter()
.zip(y_pred.iter())
.map(|(&true_val, &pred_val)| (true_val - pred_val).abs())
.sum::<f64>();
Ok(sum_absolute_error / y_true.len() as f64)
}
pub fn root_mean_squared_error(y_true: &[f64], y_pred: &[f64]) -> Result<f64> {
let mse = mean_squared_error(y_true, y_pred)?;
Ok(mse.sqrt())
}
pub fn r2_score(y_true: &[f64], y_pred: &[f64]) -> Result<f64> {
if y_true.len() != y_pred.len() {
return Err(Error::DimensionMismatch(format!(
"Length mismatch between true and predicted values: {} vs {}",
y_true.len(),
y_pred.len()
)));
}
if y_true.is_empty() {
return Err(Error::InvalidOperation(
"Cannot calculate with empty data".to_string(),
));
}
let y_mean = y_true.iter().sum::<f64>() / y_true.len() as f64;
let ss_tot = y_true
.iter()
.map(|&true_val| {
let diff = true_val - y_mean;
diff * diff
})
.sum::<f64>();
let ss_res = y_true
.iter()
.zip(y_pred.iter())
.map(|(&true_val, &pred_val)| {
let error = true_val - pred_val;
error * error
})
.sum::<f64>();
if ss_tot == 0.0 {
if ss_res == 0.0 {
Ok(1.0) } else {
Ok(0.0) }
} else {
Ok(1.0 - (ss_res / ss_tot))
}
}
pub fn explained_variance_score(y_true: &[f64], y_pred: &[f64]) -> Result<f64> {
if y_true.len() != y_pred.len() {
return Err(Error::DimensionMismatch(format!(
"Length mismatch between true and predicted values: {} vs {}",
y_true.len(),
y_pred.len()
)));
}
if y_true.is_empty() {
return Err(Error::InvalidOperation(
"Cannot calculate with empty data".to_string(),
));
}
let y_true_mean = y_true.iter().sum::<f64>() / y_true.len() as f64;
let y_pred_mean = y_pred.iter().sum::<f64>() / y_pred.len() as f64;
let var_y_true = y_true
.iter()
.map(|&val| {
let diff = val - y_true_mean;
diff * diff
})
.sum::<f64>()
/ y_true.len() as f64;
let var_residual = y_true
.iter()
.zip(y_pred.iter())
.map(|(&t, &p)| {
let residual = (t - p) - (y_true_mean - y_pred_mean);
residual * residual
})
.sum::<f64>()
/ y_true.len() as f64;
if var_y_true == 0.0 {
if var_residual == 0.0 {
Ok(1.0)
} else {
Ok(0.0)
}
} else {
Ok(1.0 - (var_residual / var_y_true))
}
}