1use numra_core::Scalar;
8
9use crate::descriptive;
10use crate::distributions::{student_t::StudentT, ContinuousDistribution};
11use crate::error::StatsError;
12
13#[derive(Clone, Debug)]
15pub struct RegressionResult<S: Scalar> {
16 pub coefficients: Vec<S>,
18 pub r_squared: S,
20 pub residuals: Vec<S>,
22 pub std_errors: Vec<S>,
24 pub p_values: Vec<S>,
26}
27
28pub fn linregress<S: Scalar>(x: &[S], y: &[S]) -> Result<RegressionResult<S>, StatsError> {
30 if x.len() != y.len() {
31 return Err(StatsError::LengthMismatch {
32 expected: x.len(),
33 got: y.len(),
34 });
35 }
36 if x.len() < 3 {
37 return Err(StatsError::EmptyData);
38 }
39 let n = x.len();
40 let ns = S::from_usize(n);
41 let mx = descriptive::mean(x)?;
42 let my = descriptive::mean(y)?;
43
44 let mut sxy = S::ZERO;
46 let mut sxx = S::ZERO;
47 for i in 0..n {
48 let dx = x[i] - mx;
49 sxy += dx * (y[i] - my);
50 sxx += dx * dx;
51 }
52 let slope = sxy / sxx;
53 let intercept = my - slope * mx;
54
55 let residuals: Vec<S> = (0..n).map(|i| y[i] - intercept - slope * x[i]).collect();
57
58 let ss_res: S = residuals.iter().fold(S::ZERO, |a, &r| a + r * r);
60 let ss_tot: S = y.iter().fold(S::ZERO, |a, &yi| a + (yi - my) * (yi - my));
61 let r_squared = S::ONE - ss_res / ss_tot;
62
63 let mse = ss_res / S::from_usize(n - 2);
65 let se_slope = (mse / sxx).sqrt();
66 let se_intercept = (mse * (S::ONE / ns + mx * mx / sxx)).sqrt();
67
68 let df = S::from_usize(n - 2);
70 let t_dist = StudentT::new(df);
71 let t_intercept = intercept / se_intercept;
72 let t_slope = slope / se_slope;
73 let p_intercept = S::TWO * (S::ONE - t_dist.cdf(t_intercept.abs()));
74 let p_slope = S::TWO * (S::ONE - t_dist.cdf(t_slope.abs()));
75
76 Ok(RegressionResult {
77 coefficients: vec![intercept, slope],
78 r_squared,
79 residuals,
80 std_errors: vec![se_intercept, se_slope],
81 p_values: vec![p_intercept, p_slope],
82 })
83}
84
85pub fn multiple_linregress<S: Scalar>(
90 x_vars: &[Vec<S>],
91 y: &[S],
92) -> Result<RegressionResult<S>, StatsError> {
93 if x_vars.is_empty() {
94 return Err(StatsError::EmptyData);
95 }
96 let n = y.len();
97 let p = x_vars.len() + 1; for v in x_vars {
99 if v.len() != n {
100 return Err(StatsError::LengthMismatch {
101 expected: n,
102 got: v.len(),
103 });
104 }
105 }
106 if n <= p {
107 return Err(StatsError::InvalidParameter(
108 "need more observations than predictors".into(),
109 ));
110 }
111
112 let mut xtx = vec![S::ZERO; p * p];
115 let mut xty = vec![S::ZERO; p];
116
117 for i in 0..n {
118 let mut xi = vec![S::ONE]; for v in x_vars {
120 xi.push(v[i]);
121 }
122 for r in 0..p {
123 for c in 0..p {
124 xtx[r * p + c] += xi[r] * xi[c];
125 }
126 xty[r] += xi[r] * y[i];
127 }
128 }
129
130 let beta = solve_linear_system::<S>(&xtx, &xty, p)?;
132
133 let my = descriptive::mean(y)?;
135 let mut residuals = Vec::with_capacity(n);
136 let mut ss_res = S::ZERO;
137 let mut ss_tot = S::ZERO;
138 for i in 0..n {
139 let mut y_hat = beta[0]; for (j, v) in x_vars.iter().enumerate() {
141 y_hat += beta[j + 1] * v[i];
142 }
143 let r = y[i] - y_hat;
144 residuals.push(r);
145 ss_res += r * r;
146 ss_tot += (y[i] - my) * (y[i] - my);
147 }
148 let r_squared = S::ONE - ss_res / ss_tot;
149
150 let mse = ss_res / S::from_usize(n - p);
152 let xtx_inv = invert_matrix::<S>(&xtx, p)?;
153 let std_errors: Vec<S> = (0..p).map(|i| (mse * xtx_inv[i * p + i]).sqrt()).collect();
154
155 let df = S::from_usize(n - p);
157 let t_dist = StudentT::new(df);
158 let p_values: Vec<S> = beta
159 .iter()
160 .zip(std_errors.iter())
161 .map(|(&b, &se)| {
162 let t = b / se;
163 S::TWO * (S::ONE - t_dist.cdf(t.abs()))
164 })
165 .collect();
166
167 Ok(RegressionResult {
168 coefficients: beta,
169 r_squared,
170 residuals,
171 std_errors,
172 p_values,
173 })
174}
175
176pub fn polyfit<S: Scalar>(x: &[S], y: &[S], degree: usize) -> Result<Vec<S>, StatsError> {
180 if x.len() != y.len() {
181 return Err(StatsError::LengthMismatch {
182 expected: x.len(),
183 got: y.len(),
184 });
185 }
186 let x_vars: Vec<Vec<S>> = (1..=degree)
188 .map(|d| x.iter().map(|&xi| xi.powi(d as i32)).collect())
189 .collect();
190 let result = multiple_linregress(&x_vars, y)?;
191 Ok(result.coefficients)
192}
193
194fn solve_linear_system<S: Scalar>(a: &[S], b: &[S], n: usize) -> Result<Vec<S>, StatsError> {
196 let mut aug = vec![S::ZERO; n * (n + 1)];
198 for i in 0..n {
199 for j in 0..n {
200 aug[i * (n + 1) + j] = a[i * n + j];
201 }
202 aug[i * (n + 1) + n] = b[i];
203 }
204
205 for col in 0..n {
207 let mut max_row = col;
209 let mut max_val = aug[col * (n + 1) + col].to_f64().abs();
210 for row in (col + 1)..n {
211 let val = aug[row * (n + 1) + col].to_f64().abs();
212 if val > max_val {
213 max_val = val;
214 max_row = row;
215 }
216 }
217 if max_val < 1e-14 {
218 return Err(StatsError::SingularMatrix);
219 }
220 if max_row != col {
222 for j in 0..=n {
223 aug.swap(col * (n + 1) + j, max_row * (n + 1) + j);
224 }
225 }
226 let pivot = aug[col * (n + 1) + col];
228 for row in (col + 1)..n {
229 let factor = aug[row * (n + 1) + col] / pivot;
230 for j in col..=n {
231 let val = aug[col * (n + 1) + j];
232 aug[row * (n + 1) + j] -= factor * val;
233 }
234 }
235 }
236
237 let mut x = vec![S::ZERO; n];
239 for i in (0..n).rev() {
240 let mut sum = aug[i * (n + 1) + n];
241 for j in (i + 1)..n {
242 sum -= aug[i * (n + 1) + j] * x[j];
243 }
244 x[i] = sum / aug[i * (n + 1) + i];
245 }
246 Ok(x)
247}
248
249fn invert_matrix<S: Scalar>(a: &[S], n: usize) -> Result<Vec<S>, StatsError> {
251 let mut aug = vec![S::ZERO; n * 2 * n];
253 for i in 0..n {
254 for j in 0..n {
255 aug[i * 2 * n + j] = a[i * n + j];
256 }
257 aug[i * 2 * n + n + i] = S::ONE;
258 }
259
260 let w = 2 * n;
261 for col in 0..n {
262 let mut max_row = col;
264 let mut max_val = aug[col * w + col].to_f64().abs();
265 for row in (col + 1)..n {
266 let val = aug[row * w + col].to_f64().abs();
267 if val > max_val {
268 max_val = val;
269 max_row = row;
270 }
271 }
272 if max_val < 1e-14 {
273 return Err(StatsError::SingularMatrix);
274 }
275 if max_row != col {
276 for j in 0..w {
277 aug.swap(col * w + j, max_row * w + j);
278 }
279 }
280 let pivot = aug[col * w + col];
282 for j in 0..w {
283 aug[col * w + j] /= pivot;
284 }
285 for row in 0..n {
287 if row == col {
288 continue;
289 }
290 let factor = aug[row * w + col];
291 for j in 0..w {
292 let val = aug[col * w + j];
293 aug[row * w + j] -= factor * val;
294 }
295 }
296 }
297
298 let mut inv = vec![S::ZERO; n * n];
300 for i in 0..n {
301 for j in 0..n {
302 inv[i * n + j] = aug[i * w + n + j];
303 }
304 }
305 Ok(inv)
306}
307
308#[cfg(test)]
309mod tests {
310 use super::*;
311
312 #[test]
313 fn test_linregress_perfect_line() {
314 let x: Vec<f64> = (0..10).map(|i| i as f64).collect();
316 let y: Vec<f64> = x.iter().map(|&xi| 2.0 + 3.0 * xi).collect();
317 let result = linregress(&x, &y).unwrap();
318 assert!((result.coefficients[0] - 2.0).abs() < 1e-10);
319 assert!((result.coefficients[1] - 3.0).abs() < 1e-10);
320 assert!((result.r_squared - 1.0).abs() < 1e-10);
321 }
322
323 #[test]
324 fn test_linregress_with_noise() {
325 let x = vec![1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
326 let y = vec![2.1, 3.9, 6.2, 7.8, 10.1, 12.0, 13.9, 16.1, 18.0, 20.2];
327 let result = linregress(&x, &y).unwrap();
328 assert!(result.r_squared > 0.99);
329 assert!((result.coefficients[1] - 2.0).abs() < 0.2);
331 }
332
333 #[test]
334 fn test_multiple_linregress() {
335 let x1: Vec<f64> = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
337 let x2: Vec<f64> = vec![2.0, 1.0, 3.0, 2.0, 4.0, 3.0, 5.0, 4.0, 6.0, 5.0];
338 let y: Vec<f64> = x1
339 .iter()
340 .zip(x2.iter())
341 .map(|(&a, &b)| 1.0 + 2.0 * a + 3.0 * b)
342 .collect();
343 let result = multiple_linregress(&[x1, x2], &y).unwrap();
344 assert!((result.coefficients[0] - 1.0).abs() < 1e-8);
345 assert!((result.coefficients[1] - 2.0).abs() < 1e-8);
346 assert!((result.coefficients[2] - 3.0).abs() < 1e-8);
347 assert!((result.r_squared - 1.0).abs() < 1e-10);
348 }
349
350 #[test]
351 fn test_polyfit_quadratic() {
352 let x: Vec<f64> = vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0];
354 let y: Vec<f64> = x.iter().map(|&xi| 1.0 + 2.0 * xi + 3.0 * xi * xi).collect();
355 let coeffs = polyfit(&x, &y, 2).unwrap();
356 assert!((coeffs[0] - 1.0).abs() < 1e-8, "c0 = {}", coeffs[0]);
357 assert!((coeffs[1] - 2.0).abs() < 1e-8, "c1 = {}", coeffs[1]);
358 assert!((coeffs[2] - 3.0).abs() < 1e-8, "c2 = {}", coeffs[2]);
359 }
360}