sciforge_lib/maths/statistics/
regression.rs1use super::descriptive::{mean, sample_variance};
2
3pub fn linear_regression(x: &[f64], y: &[f64]) -> (f64, f64) {
4 let n = x.len() as f64;
5 let sx: f64 = x.iter().sum();
6 let sy: f64 = y.iter().sum();
7 let sxy: f64 = x.iter().zip(y).map(|(xi, yi)| xi * yi).sum();
8 let sxx: f64 = x.iter().map(|xi| xi * xi).sum();
9 let slope = (n * sxy - sx * sy) / (n * sxx - sx * sx);
10 let intercept = (sy - slope * sx) / n;
11 (slope, intercept)
12}
13
14pub fn standard_error_slope(x: &[f64], y: &[f64]) -> f64 {
15 let (slope, intercept) = linear_regression(x, y);
16 let n = x.len() as f64;
17 let residual_var: f64 = x
18 .iter()
19 .zip(y)
20 .map(|(xi, yi)| (yi - slope * xi - intercept).powi(2))
21 .sum::<f64>()
22 / (n - 2.0);
23 let x_var = sample_variance(x);
24 (residual_var / ((n - 1.0) * x_var)).sqrt()
25}
26
27pub fn r_squared(x: &[f64], y: &[f64]) -> f64 {
28 let (slope, intercept) = linear_regression(x, y);
29 let my = mean(y);
30 let ss_res: f64 = x
31 .iter()
32 .zip(y)
33 .map(|(xi, yi)| (yi - (slope * xi + intercept)).powi(2))
34 .sum();
35 let ss_tot: f64 = y.iter().map(|yi| (yi - my).powi(2)).sum();
36 1.0 - ss_res / ss_tot
37}
38
39pub fn polynomial_regression(x: &[f64], y: &[f64], degree: usize) -> Vec<f64> {
40 let n = degree + 1;
41 let m = x.len();
42 let mut ata = vec![vec![0.0; n]; n];
43 let mut atb = vec![0.0; n];
44
45 for k in 0..m {
46 let mut powers = vec![0.0; 2 * n];
47 powers[0] = 1.0;
48 for p in 1..2 * n {
49 powers[p] = powers[p - 1] * x[k];
50 }
51 for i in 0..n {
52 for j in i..n {
53 ata[i][j] += powers[i + j];
54 if i != j {
55 ata[j][i] += powers[i + j];
56 }
57 }
58 atb[i] += powers[i] * y[k];
59 }
60 }
61
62 let mut aug = vec![vec![0.0; n + 1]; n];
63 for i in 0..n {
64 for j in 0..n {
65 aug[i][j] = ata[i][j];
66 }
67 aug[i][n] = atb[i];
68 }
69 for col in 0..n {
70 let mut max_row = col;
71 for row in col + 1..n {
72 if aug[row][col].abs() > aug[max_row][col].abs() {
73 max_row = row;
74 }
75 }
76 aug.swap(col, max_row);
77 let pivot = aug[col][col];
78 if pivot.abs() < 1e-30 {
79 continue;
80 }
81 aug[col][col..=n].iter_mut().for_each(|v| *v /= pivot);
82 for row in 0..n {
83 if row == col {
84 continue;
85 }
86 let factor = aug[row][col];
87 let src = aug[col][col..=n].to_vec();
88 aug[row][col..=n]
89 .iter_mut()
90 .zip(&src)
91 .for_each(|(d, &s)| *d -= factor * s);
92 }
93 }
94 (0..n).map(|i| aug[i][n]).collect()
95}
96
97pub fn exponential_regression(x: &[f64], y: &[f64]) -> (f64, f64) {
98 let ln_y: Vec<f64> = y.iter().map(|yi| yi.ln()).collect();
99 let (slope, intercept) = linear_regression(x, &ln_y);
100 (intercept.exp(), slope)
101}
102
103pub fn multiple_linear_regression(x_matrix: &[Vec<f64>], y: &[f64]) -> Vec<f64> {
104 let m = y.len();
105 let n = x_matrix[0].len() + 1;
106 let mut ata = vec![vec![0.0; n]; n];
107 let mut atb = vec![0.0; n];
108
109 for k in 0..m {
110 let mut row = vec![1.0];
111 row.extend_from_slice(&x_matrix[k]);
112 for i in 0..n {
113 for j in 0..n {
114 ata[i][j] += row[i] * row[j];
115 }
116 atb[i] += row[i] * y[k];
117 }
118 }
119
120 let mut aug = vec![vec![0.0; n + 1]; n];
121 for i in 0..n {
122 for j in 0..n {
123 aug[i][j] = ata[i][j];
124 }
125 aug[i][n] = atb[i];
126 }
127 for col in 0..n {
128 let mut max_row = col;
129 for row in col + 1..n {
130 if aug[row][col].abs() > aug[max_row][col].abs() {
131 max_row = row;
132 }
133 }
134 aug.swap(col, max_row);
135 let pivot = aug[col][col];
136 if pivot.abs() < 1e-30 {
137 continue;
138 }
139 aug[col][col..=n].iter_mut().for_each(|v| *v /= pivot);
140 for row in 0..n {
141 if row == col {
142 continue;
143 }
144 let factor = aug[row][col];
145 let src = aug[col][col..=n].to_vec();
146 aug[row][col..=n]
147 .iter_mut()
148 .zip(&src)
149 .for_each(|(d, &s)| *d -= factor * s);
150 }
151 }
152 (0..n).map(|i| aug[i][n]).collect()
153}
154
155pub fn logarithmic_regression(x: &[f64], y: &[f64]) -> (f64, f64) {
156 let ln_x: Vec<f64> = x.iter().map(|xi| xi.ln()).collect();
157 linear_regression(&ln_x, y)
158}
159
160pub fn power_regression(x: &[f64], y: &[f64]) -> (f64, f64) {
161 let ln_x: Vec<f64> = x.iter().map(|xi| xi.ln()).collect();
162 let ln_y: Vec<f64> = y.iter().map(|yi| yi.ln()).collect();
163 let (slope, intercept) = linear_regression(&ln_x, &ln_y);
164 (intercept.exp(), slope)
165}
166
167pub fn adjusted_r_squared(x: &[f64], y: &[f64], p: usize) -> f64 {
168 let r2 = r_squared(x, y);
169 let n = x.len() as f64;
170 1.0 - (1.0 - r2) * (n - 1.0) / (n - p as f64 - 1.0)
171}
172
173pub fn residuals(x: &[f64], y: &[f64]) -> Vec<f64> {
174 let (slope, intercept) = linear_regression(x, y);
175 x.iter()
176 .zip(y)
177 .map(|(xi, yi)| yi - (slope * xi + intercept))
178 .collect()
179}
180
181pub fn sum_of_squared_residuals(x: &[f64], y: &[f64]) -> f64 {
182 residuals(x, y).iter().map(|r| r * r).sum()
183}
184
185pub fn mean_squared_error(x: &[f64], y: &[f64]) -> f64 {
186 sum_of_squared_residuals(x, y) / x.len() as f64
187}
188
189pub fn root_mean_squared_error(x: &[f64], y: &[f64]) -> f64 {
190 mean_squared_error(x, y).sqrt()
191}
192
193pub fn durbin_watson(x: &[f64], y: &[f64]) -> f64 {
194 let res = residuals(x, y);
195 let n = res.len();
196 if n < 2 {
197 return 0.0;
198 }
199 let num: f64 = (1..n).map(|i| (res[i] - res[i - 1]).powi(2)).sum();
200 let den: f64 = res.iter().map(|r| r * r).sum();
201 if den < 1e-30 {
202 return 0.0;
203 }
204 num / den
205}
206
207pub fn leverage_hat(x: &[f64]) -> Vec<f64> {
208 let n = x.len();
209 let mx = mean(x);
210 let sxx: f64 = x.iter().map(|xi| (xi - mx).powi(2)).sum();
211 x.iter()
212 .map(|xi| 1.0 / n as f64 + (xi - mx).powi(2) / sxx)
213 .collect()
214}
215
216pub fn cook_distance(x: &[f64], y: &[f64]) -> Vec<f64> {
217 let res = residuals(x, y);
218 let h = leverage_hat(x);
219 let mse = mean_squared_error(x, y);
220 let p = 2.0;
221 res.iter()
222 .zip(&h)
223 .map(|(r, hi)| r * r * hi / (p * mse * (1.0 - hi).powi(2)))
224 .collect()
225}
226
227pub fn aic(n: usize, k: usize, ssr: f64) -> f64 {
228 let nf = n as f64;
229 nf * (ssr / nf).ln() + 2.0 * k as f64
230}
231
232pub fn bic(n: usize, k: usize, ssr: f64) -> f64 {
233 let nf = n as f64;
234 nf * (ssr / nf).ln() + k as f64 * nf.ln()
235}
236
237pub fn ridge_regression(x_matrix: &[Vec<f64>], y: &[f64], lambda: f64) -> Vec<f64> {
238 let m = y.len();
239 let n = x_matrix[0].len() + 1;
240 let mut ata = vec![vec![0.0; n]; n];
241 let mut atb = vec![0.0; n];
242
243 for k in 0..m {
244 let mut row = vec![1.0];
245 row.extend_from_slice(&x_matrix[k]);
246 for i in 0..n {
247 for j in 0..n {
248 ata[i][j] += row[i] * row[j];
249 }
250 atb[i] += row[i] * y[k];
251 }
252 }
253
254 for (i, ata_i) in ata.iter_mut().enumerate().skip(1) {
255 ata_i[i] += lambda;
256 }
257
258 gauss_solve_augmented(&ata, &atb)
259}
260
261fn gauss_solve_augmented(ata: &[Vec<f64>], atb: &[f64]) -> Vec<f64> {
262 let n = atb.len();
263 let mut aug = vec![vec![0.0; n + 1]; n];
264 for (i, aug_i) in aug.iter_mut().enumerate() {
265 aug_i[..n].copy_from_slice(&ata[i]);
266 aug_i[n] = atb[i];
267 }
268 for col in 0..n {
269 let mut max_row = col;
270 for row in col + 1..n {
271 if aug[row][col].abs() > aug[max_row][col].abs() {
272 max_row = row;
273 }
274 }
275 aug.swap(col, max_row);
276 let pivot = aug[col][col];
277 if pivot.abs() < 1e-30 {
278 continue;
279 }
280 aug[col][col..=n].iter_mut().for_each(|v| *v /= pivot);
281 for row in 0..n {
282 if row == col {
283 continue;
284 }
285 let factor = aug[row][col];
286 let src = aug[col][col..=n].to_vec();
287 aug[row][col..=n]
288 .iter_mut()
289 .zip(&src)
290 .for_each(|(d, &s)| *d -= factor * s);
291 }
292 }
293 (0..n).map(|i| aug[i][n]).collect()
294}