1use ghostflow_core::Tensor;
4
5pub struct BayesianRidge {
7 pub n_iter: usize,
8 pub tol: f32,
9 pub alpha_1: f32,
10 pub alpha_2: f32,
11 pub lambda_1: f32,
12 pub lambda_2: f32,
13 pub fit_intercept: bool,
14 coef_: Option<Vec<f32>>,
15 intercept_: Option<f32>,
16 alpha_: Option<f32>,
17 lambda_: Option<f32>,
18 sigma_: Option<Vec<f32>>,
19 n_iter_: usize,
20}
21
22impl BayesianRidge {
23 pub fn new() -> Self {
24 BayesianRidge {
25 n_iter: 300,
26 tol: 1e-3,
27 alpha_1: 1e-6,
28 alpha_2: 1e-6,
29 lambda_1: 1e-6,
30 lambda_2: 1e-6,
31 fit_intercept: true,
32 coef_: None,
33 intercept_: None,
34 alpha_: None,
35 lambda_: None,
36 sigma_: None,
37 n_iter_: 0,
38 }
39 }
40
41 pub fn n_iter(mut self, n: usize) -> Self {
42 self.n_iter = n;
43 self
44 }
45
46 pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
47 let x_data = x.data_f32();
48 let y_data = y.data_f32();
49 let n_samples = x.dims()[0];
50 let n_features = x.dims()[1];
51
52 let (x_centered, y_centered, x_mean, y_mean) = if self.fit_intercept {
54 let x_mean: Vec<f32> = (0..n_features)
55 .map(|j| (0..n_samples).map(|i| x_data[i * n_features + j]).sum::<f32>() / n_samples as f32)
56 .collect();
57 let y_mean = y_data.iter().sum::<f32>() / n_samples as f32;
58
59 let x_centered: Vec<f32> = (0..n_samples)
60 .flat_map(|i| (0..n_features).map(|j| x_data[i * n_features + j] - x_mean[j]).collect::<Vec<_>>())
61 .collect();
62 let y_centered: Vec<f32> = y_data.iter().map(|&y| y - y_mean).collect();
63
64 (x_centered, y_centered, x_mean, y_mean)
65 } else {
66 (x_data.clone(), y_data.clone(), vec![0.0; n_features], 0.0)
67 };
68
69 let mut alpha = 1.0f32; let mut lambda = 1.0f32; let mut xtx = vec![0.0f32; n_features * n_features];
75 for i in 0..n_features {
76 for j in 0..n_features {
77 for k in 0..n_samples {
78 xtx[i * n_features + j] += x_centered[k * n_features + i] * x_centered[k * n_features + j];
79 }
80 }
81 }
82
83 let mut xty = vec![0.0f32; n_features];
85 for i in 0..n_features {
86 for k in 0..n_samples {
87 xty[i] += x_centered[k * n_features + i] * y_centered[k];
88 }
89 }
90
91 let mut coef = vec![0.0f32; n_features];
92 let mut sigma = vec![0.0f32; n_features * n_features];
93
94 for iter in 0..self.n_iter {
95 let alpha_old = alpha;
96 let lambda_old = lambda;
97
98 let mut a = xtx.clone();
100 for i in 0..n_features {
101 a[i * n_features + i] += lambda / alpha;
102 }
103
104 sigma = self.invert_matrix(&a, n_features);
106
107 for i in 0..n_features {
109 coef[i] = 0.0;
110 for j in 0..n_features {
111 coef[i] += sigma[i * n_features + j] * xty[j];
112 }
113 }
114
115 let mut residual_sum = 0.0f32;
117 for i in 0..n_samples {
118 let mut pred = 0.0f32;
119 for j in 0..n_features {
120 pred += x_centered[i * n_features + j] * coef[j];
121 }
122 residual_sum += (y_centered[i] - pred).powi(2);
123 }
124
125 let mut gamma = 0.0f32;
127 for i in 0..n_features {
128 let mut eigenvalue_approx = 0.0f32;
129 for j in 0..n_features {
130 eigenvalue_approx += xtx[i * n_features + j] * sigma[j * n_features + i];
131 }
132 gamma += alpha * eigenvalue_approx;
133 }
134 gamma = gamma.clamp(0.0, n_features as f32);
135
136 alpha = (n_samples as f32 - gamma + 2.0 * self.alpha_1) /
138 (residual_sum + 2.0 * self.alpha_2);
139
140 let coef_sq_sum: f32 = coef.iter().map(|&c| c * c).sum();
141 lambda = (gamma + 2.0 * self.lambda_1) / (coef_sq_sum + 2.0 * self.lambda_2);
142
143 self.n_iter_ = iter + 1;
144
145 if (alpha - alpha_old).abs() < self.tol && (lambda - lambda_old).abs() < self.tol {
147 break;
148 }
149 }
150
151 let intercept = if self.fit_intercept {
153 let mut int = y_mean;
154 for j in 0..n_features {
155 int -= coef[j] * x_mean[j];
156 }
157 int
158 } else {
159 0.0
160 };
161
162 self.coef_ = Some(coef);
163 self.intercept_ = Some(intercept);
164 self.alpha_ = Some(alpha);
165 self.lambda_ = Some(lambda);
166 self.sigma_ = Some(sigma);
167 }
168
169 fn invert_matrix(&self, a: &[f32], n: usize) -> Vec<f32> {
170 let mut l = vec![0.0f32; n * n];
171
172 for i in 0..n {
174 for j in 0..=i {
175 let mut sum = 0.0f32;
176 if i == j {
177 for k in 0..j {
178 sum += l[j * n + k] * l[j * n + k];
179 }
180 l[j * n + j] = (a[j * n + j] - sum).max(1e-10).sqrt();
181 } else {
182 for k in 0..j {
183 sum += l[i * n + k] * l[j * n + k];
184 }
185 l[i * n + j] = (a[i * n + j] - sum) / l[j * n + j].max(1e-10);
186 }
187 }
188 }
189
190 let mut l_inv = vec![0.0f32; n * n];
192 for i in 0..n {
193 l_inv[i * n + i] = 1.0 / l[i * n + i].max(1e-10);
194 for j in (i + 1)..n {
195 let mut sum = 0.0f32;
196 for k in i..j {
197 sum += l[j * n + k] * l_inv[k * n + i];
198 }
199 l_inv[j * n + i] = -sum / l[j * n + j].max(1e-10);
200 }
201 }
202
203 let mut inv = vec![0.0f32; n * n];
205 for i in 0..n {
206 for j in 0..n {
207 for k in 0..n {
208 inv[i * n + j] += l_inv[k * n + i] * l_inv[k * n + j];
209 }
210 }
211 }
212
213 inv
214 }
215
216 pub fn predict(&self, x: &Tensor) -> Tensor {
217 let x_data = x.data_f32();
218 let n_samples = x.dims()[0];
219 let n_features = x.dims()[1];
220
221 let coef = self.coef_.as_ref().expect("Model not fitted");
222 let intercept = self.intercept_.unwrap_or(0.0);
223
224 let predictions: Vec<f32> = (0..n_samples)
225 .map(|i| {
226 let mut pred = intercept;
227 for j in 0..n_features {
228 pred += coef[j] * x_data[i * n_features + j];
229 }
230 pred
231 })
232 .collect();
233
234 Tensor::from_slice(&predictions, &[n_samples]).unwrap()
235 }
236
237 pub fn predict_with_std(&self, x: &Tensor) -> (Tensor, Tensor) {
238 let x_data = x.data_f32();
239 let n_samples = x.dims()[0];
240 let n_features = x.dims()[1];
241
242 let coef = self.coef_.as_ref().expect("Model not fitted");
243 let intercept = self.intercept_.unwrap_or(0.0);
244 let sigma = self.sigma_.as_ref().expect("Model not fitted");
245 let alpha = self.alpha_.unwrap_or(1.0);
246
247 let mut predictions = Vec::with_capacity(n_samples);
248 let mut stds = Vec::with_capacity(n_samples);
249
250 for i in 0..n_samples {
251 let xi = &x_data[i * n_features..(i + 1) * n_features];
252
253 let mut pred = intercept;
254 for j in 0..n_features {
255 pred += coef[j] * xi[j];
256 }
257 predictions.push(pred);
258
259 let mut var = 1.0 / alpha;
261 for j in 0..n_features {
262 for k in 0..n_features {
263 var += xi[j] * sigma[j * n_features + k] * xi[k];
264 }
265 }
266 stds.push(var.sqrt());
267 }
268
269 (
270 Tensor::from_slice(&predictions, &[n_samples]).unwrap(),
271 Tensor::from_slice(&stds, &[n_samples]).unwrap(),
272 )
273 }
274
275 pub fn score(&self, x: &Tensor, y: &Tensor) -> f32 {
276 let predictions = self.predict(x);
277 let pred_data = predictions.data_f32();
278 let y_data = y.data_f32();
279
280 let y_mean: f32 = y_data.iter().sum::<f32>() / y_data.len() as f32;
281 let ss_res: f32 = pred_data.iter().zip(y_data.iter()).map(|(&p, &y)| (y - p).powi(2)).sum();
282 let ss_tot: f32 = y_data.iter().map(|&y| (y - y_mean).powi(2)).sum();
283
284 1.0 - ss_res / ss_tot.max(1e-10)
285 }
286}
287
288impl Default for BayesianRidge {
289 fn default() -> Self {
290 Self::new()
291 }
292}
293
294
295pub struct ARDRegression {
297 pub n_iter: usize,
298 pub tol: f32,
299 pub alpha_1: f32,
300 pub alpha_2: f32,
301 pub lambda_1: f32,
302 pub lambda_2: f32,
303 pub threshold_lambda: f32,
304 pub fit_intercept: bool,
305 coef_: Option<Vec<f32>>,
306 intercept_: Option<f32>,
307 alpha_: Option<f32>,
308 lambda_: Option<Vec<f32>>,
309 sigma_: Option<Vec<f32>>,
310 n_iter_: usize,
311}
312
313impl ARDRegression {
314 pub fn new() -> Self {
315 ARDRegression {
316 n_iter: 300,
317 tol: 1e-3,
318 alpha_1: 1e-6,
319 alpha_2: 1e-6,
320 lambda_1: 1e-6,
321 lambda_2: 1e-6,
322 threshold_lambda: 1e4,
323 fit_intercept: true,
324 coef_: None,
325 intercept_: None,
326 alpha_: None,
327 lambda_: None,
328 sigma_: None,
329 n_iter_: 0,
330 }
331 }
332
333 pub fn n_iter(mut self, n: usize) -> Self {
334 self.n_iter = n;
335 self
336 }
337
338 pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
339 let x_data = x.data_f32();
340 let y_data = y.data_f32();
341 let n_samples = x.dims()[0];
342 let n_features = x.dims()[1];
343
344 let (x_centered, y_centered, x_mean, y_mean) = if self.fit_intercept {
346 let x_mean: Vec<f32> = (0..n_features)
347 .map(|j| (0..n_samples).map(|i| x_data[i * n_features + j]).sum::<f32>() / n_samples as f32)
348 .collect();
349 let y_mean = y_data.iter().sum::<f32>() / n_samples as f32;
350
351 let x_centered: Vec<f32> = (0..n_samples)
352 .flat_map(|i| (0..n_features).map(|j| x_data[i * n_features + j] - x_mean[j]).collect::<Vec<_>>())
353 .collect();
354 let y_centered: Vec<f32> = y_data.iter().map(|&y| y - y_mean).collect();
355
356 (x_centered, y_centered, x_mean, y_mean)
357 } else {
358 (x_data.clone(), y_data.clone(), vec![0.0; n_features], 0.0)
359 };
360
361 let mut alpha = 1.0f32;
363 let mut lambda = vec![1.0f32; n_features]; let mut xtx = vec![0.0f32; n_features * n_features];
367 let mut xty = vec![0.0f32; n_features];
368
369 for i in 0..n_features {
370 for k in 0..n_samples {
371 xty[i] += x_centered[k * n_features + i] * y_centered[k];
372 }
373 for j in 0..n_features {
374 for k in 0..n_samples {
375 xtx[i * n_features + j] += x_centered[k * n_features + i] * x_centered[k * n_features + j];
376 }
377 }
378 }
379
380 let mut coef = vec![0.0f32; n_features];
381 let mut sigma = vec![0.0f32; n_features * n_features];
382
383 for iter in 0..self.n_iter {
384 let alpha_old = alpha;
385 let lambda_old = lambda.clone();
386
387 let mut a = xtx.clone();
389 for i in 0..n_features {
390 a[i * n_features + i] += lambda[i] / alpha;
391 }
392
393 sigma = self.invert_matrix(&a, n_features);
394
395 for i in 0..n_features {
397 coef[i] = 0.0;
398 for j in 0..n_features {
399 coef[i] += sigma[i * n_features + j] * xty[j];
400 }
401 }
402
403 let mut residual_sum = 0.0f32;
405 for i in 0..n_samples {
406 let mut pred = 0.0f32;
407 for j in 0..n_features {
408 pred += x_centered[i * n_features + j] * coef[j];
409 }
410 residual_sum += (y_centered[i] - pred).powi(2);
411 }
412
413 let mut gamma = 0.0f32;
414 for i in 0..n_features {
415 gamma += 1.0 - lambda[i] * sigma[i * n_features + i];
416 }
417 gamma = gamma.clamp(0.0, n_features as f32);
418
419 alpha = (n_samples as f32 - gamma + 2.0 * self.alpha_1) /
420 (residual_sum + 2.0 * self.alpha_2);
421
422 for i in 0..n_features {
424 let gamma_i = 1.0 - lambda[i] * sigma[i * n_features + i];
425 lambda[i] = (gamma_i + 2.0 * self.lambda_1) /
426 (coef[i] * coef[i] + 2.0 * self.lambda_2);
427
428 if lambda[i] > self.threshold_lambda {
430 lambda[i] = self.threshold_lambda;
431 }
432 }
433
434 self.n_iter_ = iter + 1;
435
436 let alpha_diff = (alpha - alpha_old).abs();
438 let lambda_diff: f32 = lambda.iter().zip(lambda_old.iter())
439 .map(|(&l, &lo)| (l - lo).abs())
440 .sum::<f32>() / n_features as f32;
441
442 if alpha_diff < self.tol && lambda_diff < self.tol {
443 break;
444 }
445 }
446
447 let intercept = if self.fit_intercept {
449 let mut int = y_mean;
450 for j in 0..n_features {
451 int -= coef[j] * x_mean[j];
452 }
453 int
454 } else {
455 0.0
456 };
457
458 self.coef_ = Some(coef);
459 self.intercept_ = Some(intercept);
460 self.alpha_ = Some(alpha);
461 self.lambda_ = Some(lambda);
462 self.sigma_ = Some(sigma);
463 }
464
465 fn invert_matrix(&self, a: &[f32], n: usize) -> Vec<f32> {
466 let mut l = vec![0.0f32; n * n];
467
468 for i in 0..n {
469 for j in 0..=i {
470 let mut sum = 0.0f32;
471 if i == j {
472 for k in 0..j {
473 sum += l[j * n + k] * l[j * n + k];
474 }
475 l[j * n + j] = (a[j * n + j] - sum).max(1e-10).sqrt();
476 } else {
477 for k in 0..j {
478 sum += l[i * n + k] * l[j * n + k];
479 }
480 l[i * n + j] = (a[i * n + j] - sum) / l[j * n + j].max(1e-10);
481 }
482 }
483 }
484
485 let mut l_inv = vec![0.0f32; n * n];
486 for i in 0..n {
487 l_inv[i * n + i] = 1.0 / l[i * n + i].max(1e-10);
488 for j in (i + 1)..n {
489 let mut sum = 0.0f32;
490 for k in i..j {
491 sum += l[j * n + k] * l_inv[k * n + i];
492 }
493 l_inv[j * n + i] = -sum / l[j * n + j].max(1e-10);
494 }
495 }
496
497 let mut inv = vec![0.0f32; n * n];
498 for i in 0..n {
499 for j in 0..n {
500 for k in 0..n {
501 inv[i * n + j] += l_inv[k * n + i] * l_inv[k * n + j];
502 }
503 }
504 }
505
506 inv
507 }
508
509 pub fn predict(&self, x: &Tensor) -> Tensor {
510 let x_data = x.data_f32();
511 let n_samples = x.dims()[0];
512 let n_features = x.dims()[1];
513
514 let coef = self.coef_.as_ref().expect("Model not fitted");
515 let intercept = self.intercept_.unwrap_or(0.0);
516
517 let predictions: Vec<f32> = (0..n_samples)
518 .map(|i| {
519 let mut pred = intercept;
520 for j in 0..n_features {
521 pred += coef[j] * x_data[i * n_features + j];
522 }
523 pred
524 })
525 .collect();
526
527 Tensor::from_slice(&predictions, &[n_samples]).unwrap()
528 }
529
530 pub fn score(&self, x: &Tensor, y: &Tensor) -> f32 {
531 let predictions = self.predict(x);
532 let pred_data = predictions.data_f32();
533 let y_data = y.data_f32();
534
535 let y_mean: f32 = y_data.iter().sum::<f32>() / y_data.len() as f32;
536 let ss_res: f32 = pred_data.iter().zip(y_data.iter()).map(|(&p, &y)| (y - p).powi(2)).sum();
537 let ss_tot: f32 = y_data.iter().map(|&y| (y - y_mean).powi(2)).sum();
538
539 1.0 - ss_res / ss_tot.max(1e-10)
540 }
541}
542
543impl Default for ARDRegression {
544 fn default() -> Self {
545 Self::new()
546 }
547}
548
549#[cfg(test)]
550mod tests {
551 use super::*;
552
553 #[test]
554 fn test_bayesian_ridge() {
555 let x = Tensor::from_slice(&[1.0, 2.0, 3.0, 4.0, 5.0], &[5, 1]).unwrap();
556 let y = Tensor::from_slice(&[2.0, 4.0, 6.0, 8.0, 10.0], &[5]).unwrap();
557
558 let mut br = BayesianRidge::new().n_iter(100);
559 br.fit(&x, &y);
560
561 let score = br.score(&x, &y);
562 assert!(score > 0.9);
563 }
564
565 #[test]
566 fn test_ard_regression() {
567 let x = Tensor::from_slice(&[
568 1.0, 0.0,
569 2.0, 0.0,
570 3.0, 0.0,
571 4.0, 0.0,
572 ], &[4, 2]).unwrap();
573 let y = Tensor::from_slice(&[2.0, 4.0, 6.0, 8.0], &[4]).unwrap();
574
575 let mut ard = ARDRegression::new().n_iter(100);
576 ard.fit(&x, &y);
577
578 let predictions = ard.predict(&x);
579 assert_eq!(predictions.dims(), &[4]);
580 }
581}