1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
use std::error::Error;

use nalgebra::DVector;

use crate::data::dataset::RealNumber;

/// A trait for computing regression metrics.
pub trait RegressionMetrics<T: RealNumber> {
    /// Computes the mean squared error (MSE) between the true values and the predicted values.
    ///
    /// # Arguments
    ///
    /// * `y_true` - The true values.
    /// * `y_pred` - The predicted values.
    ///
    /// # Returns
    ///
    /// The mean squared error.
    ///
    /// # Errors
    ///
    /// Returns an error if the lengths of `y_true` and `y_pred` are different.
    fn mse(&self, y_true: &DVector<T>, y_pred: &DVector<T>) -> Result<T, Box<dyn Error>> {
        if y_true.len() != y_pred.len() {
            return Err("Predictions and labels are of different sizes.".into());
        }

        let n = T::from_usize(y_true.len()).unwrap();
        let errors = y_pred - y_true;
        let errors_sq = errors.component_mul(&errors);

        Ok(errors_sq.sum() / n)
    }

    /// Computes the mean absolute error (MAE) between the true values and the predicted values.
    ///
    /// # Arguments
    ///
    /// * `y_true` - The true values.
    /// * `y_pred` - The predicted values.
    ///
    /// # Returns
    ///
    /// The mean absolute error.
    ///
    /// # Errors
    ///
    /// Returns an error if the lengths of `y_true` and `y_pred` are different.
    fn mae(&self, y_true: &DVector<T>, y_pred: &DVector<T>) -> Result<T, Box<dyn Error>> {
        if y_true.len() != y_pred.len() {
            return Err("Predictions and labels are of different sizes.".into());
        }
        let n = T::from_usize(y_true.len()).unwrap();
        let abs_errors_sum = y_pred
            .iter()
            .zip(y_true.iter())
            .map(|(&y_p, &y_t)| (y_p - y_t).abs())
            .fold(T::from_f64(0.0).unwrap(), |acc, x| acc + x);

        Ok(abs_errors_sum / n)
    }

    /// Computes the coefficient of determination (R^2) between the true values and the predicted values.
    ///
    /// # Arguments
    ///
    /// * `y_true` - The true values.
    /// * `y_pred` - The predicted values.
    ///
    /// # Returns
    ///
    /// The coefficient of determination (R^2).
    ///
    /// # Errors
    ///
    /// Returns an error if the lengths of `y_true` and `y_pred` are different.
    fn r2(&self, y_true: &DVector<T>, y_pred: &DVector<T>) -> Result<T, Box<dyn Error>> {
        if y_true.len() != y_pred.len() {
            return Err("Predictions and labels are of different sizes.".into());
        }
        let n = T::from_usize(y_true.len()).unwrap();

        let y_true_mean = y_true.sum() / n;

        let y_true_mean_vec = DVector::from_element(y_true.len(), y_true_mean);

        let mse_model = self.mse(y_true, y_pred)?;
        let mse_base = self.mse(&y_true_mean_vec, y_true)?;

        Ok(T::from_f64(1.0).unwrap() - (mse_model / mse_base))
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use nalgebra::DVector;

    struct MockRegressor;

    impl RegressionMetrics<f64> for MockRegressor {}

    #[test]
    fn test_mse() {
        let regressor = MockRegressor;
        let y_true = DVector::from_vec(vec![1.0, 2.0, 3.0]);
        let y_pred = DVector::from_vec(vec![1.1, 1.9, 3.2]);

        let mse = regressor.mse(&y_true, &y_pred).unwrap();
        let expected_mse = ((0.1 * 0.1) + (0.1 * 0.1) + (0.2 * 0.2)) / 3.0;
        assert!((mse - expected_mse).abs() < 1e-6);
    }

    #[test]
    fn test_mae() {
        let regressor = MockRegressor;
        let y_true = DVector::from_vec(vec![1.0, 2.0, 3.0]);
        let y_pred = DVector::from_vec(vec![1.1, 1.9, 3.2]);

        let mae = regressor.mae(&y_true, &y_pred).unwrap();
        let expected_mae = (0.1 + 0.1 + 0.2) / 3.0;
        assert!((mae - expected_mae).abs() < 1e-6);
    }

    #[test]
    fn test_r2() {
        let regressor = MockRegressor;
        let y_true = DVector::from_vec(vec![1.0, 2.0, 3.0]);
        let y_pred = DVector::from_vec(vec![1.1, 1.9, 3.2]);

        let r2 = regressor.r2(&y_true, &y_pred).unwrap();

        let y_true_mean = y_true.mean();
        let tss: f64 = y_true.iter().map(|&y| (y - y_true_mean).powi(2)).sum();

        let rss: f64 = y_true
            .iter()
            .zip(y_pred.iter())
            .map(|(&y_t, &y_p)| (y_t - y_p).powi(2))
            .sum();

        // Calculate expected R2
        let expected_r2 = 1.0 - (rss / tss);

        assert!((r2 - expected_r2).abs() < 1e-6);
    }

    #[test]
    fn test_different_length_error() {
        let regressor = MockRegressor;
        let y_true = DVector::from_vec(vec![1.0, 2.0, 3.0]);
        let y_pred = DVector::from_vec(vec![1.1, 1.9]);

        assert!(regressor.mse(&y_true, &y_pred).is_err());
        assert!(regressor.mae(&y_true, &y_pred).is_err());
        assert!(regressor.r2(&y_true, &y_pred).is_err());
    }
}