Skip to main content

sciforge_lib/maths/statistics/
regression.rs

1use super::descriptive::{mean, sample_variance};
2
3pub fn linear_regression(x: &[f64], y: &[f64]) -> (f64, f64) {
4    let n = x.len() as f64;
5    let sx: f64 = x.iter().sum();
6    let sy: f64 = y.iter().sum();
7    let sxy: f64 = x.iter().zip(y).map(|(xi, yi)| xi * yi).sum();
8    let sxx: f64 = x.iter().map(|xi| xi * xi).sum();
9    let slope = (n * sxy - sx * sy) / (n * sxx - sx * sx);
10    let intercept = (sy - slope * sx) / n;
11    (slope, intercept)
12}
13
14pub fn standard_error_slope(x: &[f64], y: &[f64]) -> f64 {
15    let (slope, intercept) = linear_regression(x, y);
16    let n = x.len() as f64;
17    let residual_var: f64 = x
18        .iter()
19        .zip(y)
20        .map(|(xi, yi)| (yi - slope * xi - intercept).powi(2))
21        .sum::<f64>()
22        / (n - 2.0);
23    let x_var = sample_variance(x);
24    (residual_var / ((n - 1.0) * x_var)).sqrt()
25}
26
27pub fn r_squared(x: &[f64], y: &[f64]) -> f64 {
28    let (slope, intercept) = linear_regression(x, y);
29    let my = mean(y);
30    let ss_res: f64 = x
31        .iter()
32        .zip(y)
33        .map(|(xi, yi)| (yi - (slope * xi + intercept)).powi(2))
34        .sum();
35    let ss_tot: f64 = y.iter().map(|yi| (yi - my).powi(2)).sum();
36    1.0 - ss_res / ss_tot
37}
38
39pub fn polynomial_regression(x: &[f64], y: &[f64], degree: usize) -> Vec<f64> {
40    let n = degree + 1;
41    let m = x.len();
42    let mut ata = vec![vec![0.0; n]; n];
43    let mut atb = vec![0.0; n];
44
45    for k in 0..m {
46        let mut powers = vec![0.0; 2 * n];
47        powers[0] = 1.0;
48        for p in 1..2 * n {
49            powers[p] = powers[p - 1] * x[k];
50        }
51        for i in 0..n {
52            for j in i..n {
53                ata[i][j] += powers[i + j];
54                if i != j {
55                    ata[j][i] += powers[i + j];
56                }
57            }
58            atb[i] += powers[i] * y[k];
59        }
60    }
61
62    let mut aug = vec![vec![0.0; n + 1]; n];
63    for i in 0..n {
64        for j in 0..n {
65            aug[i][j] = ata[i][j];
66        }
67        aug[i][n] = atb[i];
68    }
69    for col in 0..n {
70        let mut max_row = col;
71        for row in col + 1..n {
72            if aug[row][col].abs() > aug[max_row][col].abs() {
73                max_row = row;
74            }
75        }
76        aug.swap(col, max_row);
77        let pivot = aug[col][col];
78        if pivot.abs() < 1e-30 {
79            continue;
80        }
81        aug[col][col..=n].iter_mut().for_each(|v| *v /= pivot);
82        for row in 0..n {
83            if row == col {
84                continue;
85            }
86            let factor = aug[row][col];
87            let src = aug[col][col..=n].to_vec();
88            aug[row][col..=n]
89                .iter_mut()
90                .zip(&src)
91                .for_each(|(d, &s)| *d -= factor * s);
92        }
93    }
94    (0..n).map(|i| aug[i][n]).collect()
95}
96
97pub fn exponential_regression(x: &[f64], y: &[f64]) -> (f64, f64) {
98    let ln_y: Vec<f64> = y.iter().map(|yi| yi.ln()).collect();
99    let (slope, intercept) = linear_regression(x, &ln_y);
100    (intercept.exp(), slope)
101}
102
103pub fn multiple_linear_regression(x_matrix: &[Vec<f64>], y: &[f64]) -> Vec<f64> {
104    let m = y.len();
105    let n = x_matrix[0].len() + 1;
106    let mut ata = vec![vec![0.0; n]; n];
107    let mut atb = vec![0.0; n];
108
109    for k in 0..m {
110        let mut row = vec![1.0];
111        row.extend_from_slice(&x_matrix[k]);
112        for i in 0..n {
113            for j in 0..n {
114                ata[i][j] += row[i] * row[j];
115            }
116            atb[i] += row[i] * y[k];
117        }
118    }
119
120    let mut aug = vec![vec![0.0; n + 1]; n];
121    for i in 0..n {
122        for j in 0..n {
123            aug[i][j] = ata[i][j];
124        }
125        aug[i][n] = atb[i];
126    }
127    for col in 0..n {
128        let mut max_row = col;
129        for row in col + 1..n {
130            if aug[row][col].abs() > aug[max_row][col].abs() {
131                max_row = row;
132            }
133        }
134        aug.swap(col, max_row);
135        let pivot = aug[col][col];
136        if pivot.abs() < 1e-30 {
137            continue;
138        }
139        aug[col][col..=n].iter_mut().for_each(|v| *v /= pivot);
140        for row in 0..n {
141            if row == col {
142                continue;
143            }
144            let factor = aug[row][col];
145            let src = aug[col][col..=n].to_vec();
146            aug[row][col..=n]
147                .iter_mut()
148                .zip(&src)
149                .for_each(|(d, &s)| *d -= factor * s);
150        }
151    }
152    (0..n).map(|i| aug[i][n]).collect()
153}
154
155pub fn logarithmic_regression(x: &[f64], y: &[f64]) -> (f64, f64) {
156    let ln_x: Vec<f64> = x.iter().map(|xi| xi.ln()).collect();
157    linear_regression(&ln_x, y)
158}
159
160pub fn power_regression(x: &[f64], y: &[f64]) -> (f64, f64) {
161    let ln_x: Vec<f64> = x.iter().map(|xi| xi.ln()).collect();
162    let ln_y: Vec<f64> = y.iter().map(|yi| yi.ln()).collect();
163    let (slope, intercept) = linear_regression(&ln_x, &ln_y);
164    (intercept.exp(), slope)
165}
166
167pub fn adjusted_r_squared(x: &[f64], y: &[f64], p: usize) -> f64 {
168    let r2 = r_squared(x, y);
169    let n = x.len() as f64;
170    1.0 - (1.0 - r2) * (n - 1.0) / (n - p as f64 - 1.0)
171}
172
173pub fn residuals(x: &[f64], y: &[f64]) -> Vec<f64> {
174    let (slope, intercept) = linear_regression(x, y);
175    x.iter()
176        .zip(y)
177        .map(|(xi, yi)| yi - (slope * xi + intercept))
178        .collect()
179}
180
181pub fn sum_of_squared_residuals(x: &[f64], y: &[f64]) -> f64 {
182    residuals(x, y).iter().map(|r| r * r).sum()
183}
184
185pub fn mean_squared_error(x: &[f64], y: &[f64]) -> f64 {
186    sum_of_squared_residuals(x, y) / x.len() as f64
187}
188
189pub fn root_mean_squared_error(x: &[f64], y: &[f64]) -> f64 {
190    mean_squared_error(x, y).sqrt()
191}
192
193pub fn durbin_watson(x: &[f64], y: &[f64]) -> f64 {
194    let res = residuals(x, y);
195    let n = res.len();
196    if n < 2 {
197        return 0.0;
198    }
199    let num: f64 = (1..n).map(|i| (res[i] - res[i - 1]).powi(2)).sum();
200    let den: f64 = res.iter().map(|r| r * r).sum();
201    if den < 1e-30 {
202        return 0.0;
203    }
204    num / den
205}
206
207pub fn leverage_hat(x: &[f64]) -> Vec<f64> {
208    let n = x.len();
209    let mx = mean(x);
210    let sxx: f64 = x.iter().map(|xi| (xi - mx).powi(2)).sum();
211    x.iter()
212        .map(|xi| 1.0 / n as f64 + (xi - mx).powi(2) / sxx)
213        .collect()
214}
215
216pub fn cook_distance(x: &[f64], y: &[f64]) -> Vec<f64> {
217    let res = residuals(x, y);
218    let h = leverage_hat(x);
219    let mse = mean_squared_error(x, y);
220    let p = 2.0;
221    res.iter()
222        .zip(&h)
223        .map(|(r, hi)| r * r * hi / (p * mse * (1.0 - hi).powi(2)))
224        .collect()
225}
226
227pub fn aic(n: usize, k: usize, ssr: f64) -> f64 {
228    let nf = n as f64;
229    nf * (ssr / nf).ln() + 2.0 * k as f64
230}
231
232pub fn bic(n: usize, k: usize, ssr: f64) -> f64 {
233    let nf = n as f64;
234    nf * (ssr / nf).ln() + k as f64 * nf.ln()
235}
236
237pub fn ridge_regression(x_matrix: &[Vec<f64>], y: &[f64], lambda: f64) -> Vec<f64> {
238    let m = y.len();
239    let n = x_matrix[0].len() + 1;
240    let mut ata = vec![vec![0.0; n]; n];
241    let mut atb = vec![0.0; n];
242
243    for k in 0..m {
244        let mut row = vec![1.0];
245        row.extend_from_slice(&x_matrix[k]);
246        for i in 0..n {
247            for j in 0..n {
248                ata[i][j] += row[i] * row[j];
249            }
250            atb[i] += row[i] * y[k];
251        }
252    }
253
254    for (i, ata_i) in ata.iter_mut().enumerate().skip(1) {
255        ata_i[i] += lambda;
256    }
257
258    gauss_solve_augmented(&ata, &atb)
259}
260
261fn gauss_solve_augmented(ata: &[Vec<f64>], atb: &[f64]) -> Vec<f64> {
262    let n = atb.len();
263    let mut aug = vec![vec![0.0; n + 1]; n];
264    for (i, aug_i) in aug.iter_mut().enumerate() {
265        aug_i[..n].copy_from_slice(&ata[i]);
266        aug_i[n] = atb[i];
267    }
268    for col in 0..n {
269        let mut max_row = col;
270        for row in col + 1..n {
271            if aug[row][col].abs() > aug[max_row][col].abs() {
272                max_row = row;
273            }
274        }
275        aug.swap(col, max_row);
276        let pivot = aug[col][col];
277        if pivot.abs() < 1e-30 {
278            continue;
279        }
280        aug[col][col..=n].iter_mut().for_each(|v| *v /= pivot);
281        for row in 0..n {
282            if row == col {
283                continue;
284            }
285            let factor = aug[row][col];
286            let src = aug[col][col..=n].to_vec();
287            aug[row][col..=n]
288                .iter_mut()
289                .zip(&src)
290                .for_each(|(d, &s)| *d -= factor * s);
291        }
292    }
293    (0..n).map(|i| aug[i][n]).collect()
294}