1use ghostflow_core::Tensor;
4use rand::prelude::*;
5
6pub struct HuberRegressor {
8 pub epsilon: f32,
9 pub max_iter: usize,
10 pub alpha: f32,
11 pub fit_intercept: bool,
12 pub tol: f32,
13 coef_: Option<Vec<f32>>,
14 intercept_: f32,
15 scale_: f32,
16 n_iter_: usize,
17}
18
19impl HuberRegressor {
20 pub fn new() -> Self {
21 HuberRegressor {
22 epsilon: 1.35,
23 max_iter: 100,
24 alpha: 0.0001,
25 fit_intercept: true,
26 tol: 1e-5,
27 coef_: None,
28 intercept_: 0.0,
29 scale_: 1.0,
30 n_iter_: 0,
31 }
32 }
33
34 pub fn epsilon(mut self, eps: f32) -> Self {
35 self.epsilon = eps;
36 self
37 }
38
39 pub fn alpha(mut self, alpha: f32) -> Self {
40 self.alpha = alpha;
41 self
42 }
43
44 pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
45 let x_data = x.data_f32();
46 let y_data = y.data_f32();
47 let n_samples = x.dims()[0];
48 let n_features = x.dims()[1];
49
50 let (x_centered, y_centered, x_mean, y_mean) = if self.fit_intercept {
52 let x_mean: Vec<f32> = (0..n_features)
53 .map(|j| (0..n_samples).map(|i| x_data[i * n_features + j]).sum::<f32>() / n_samples as f32)
54 .collect();
55 let y_mean = y_data.iter().sum::<f32>() / n_samples as f32;
56
57 let x_c: Vec<f32> = (0..n_samples)
58 .flat_map(|i| (0..n_features).map(|j| x_data[i * n_features + j] - x_mean[j]).collect::<Vec<_>>())
59 .collect();
60 let y_c: Vec<f32> = y_data.iter().map(|&y| y - y_mean).collect();
61 (x_c, y_c, x_mean, y_mean)
62 } else {
63 (x_data.clone(), y_data.clone(), vec![0.0; n_features], 0.0)
64 };
65
66 let mut coef = vec![0.0f32; n_features];
67
68 let mut residuals: Vec<f32> = (0..n_samples)
70 .map(|i| y_centered[i])
71 .collect();
72 residuals.sort_by(|a, b| a.abs().partial_cmp(&b.abs()).unwrap());
73 let mut scale = residuals[n_samples / 2].abs() / 0.6745;
74 scale = scale.max(1e-10);
75
76 for iter in 0..self.max_iter {
78 let coef_old = coef.clone();
79
80 let mut weights = vec![0.0f32; n_samples];
82 for i in 0..n_samples {
83 let mut pred = 0.0f32;
84 for j in 0..n_features {
85 pred += coef[j] * x_centered[i * n_features + j];
86 }
87 let residual = (y_centered[i] - pred) / scale;
88 weights[i] = if residual.abs() <= self.epsilon {
89 1.0
90 } else {
91 self.epsilon / residual.abs()
92 };
93 }
94
95 let mut xtx = vec![0.0f32; n_features * n_features];
97 let mut xty = vec![0.0f32; n_features];
98
99 for i in 0..n_features {
100 for k in 0..n_samples {
101 xty[i] += weights[k] * x_centered[k * n_features + i] * y_centered[k];
102 }
103 for j in 0..n_features {
104 for k in 0..n_samples {
105 xtx[i * n_features + j] += weights[k] *
106 x_centered[k * n_features + i] * x_centered[k * n_features + j];
107 }
108 }
109 xtx[i * n_features + i] += self.alpha;
110 }
111
112 coef = solve_linear_system(&xtx, &xty, n_features);
113
114 let mut new_residuals: Vec<f32> = (0..n_samples)
116 .map(|i| {
117 let mut pred = 0.0f32;
118 for j in 0..n_features {
119 pred += coef[j] * x_centered[i * n_features + j];
120 }
121 (y_centered[i] - pred).abs()
122 })
123 .collect();
124 new_residuals.sort_by(|a, b| a.partial_cmp(b).unwrap());
125 scale = new_residuals[n_samples / 2] / 0.6745;
126 scale = scale.max(1e-10);
127
128 self.n_iter_ = iter + 1;
129
130 let max_change = coef.iter().zip(coef_old.iter())
131 .map(|(&new, &old)| (new - old).abs())
132 .fold(0.0f32, f32::max);
133 if max_change < self.tol {
134 break;
135 }
136 }
137
138 self.intercept_ = if self.fit_intercept {
139 y_mean - coef.iter().zip(x_mean.iter()).map(|(&c, &m)| c * m).sum::<f32>()
140 } else {
141 0.0
142 };
143 self.scale_ = scale;
144 self.coef_ = Some(coef);
145 }
146
147 pub fn predict(&self, x: &Tensor) -> Tensor {
148 let x_data = x.data_f32();
149 let n_samples = x.dims()[0];
150 let n_features = x.dims()[1];
151 let coef = self.coef_.as_ref().expect("Model not fitted");
152
153 let predictions: Vec<f32> = (0..n_samples)
154 .map(|i| {
155 let mut pred = self.intercept_;
156 for j in 0..n_features {
157 pred += coef[j] * x_data[i * n_features + j];
158 }
159 pred
160 })
161 .collect();
162
163 Tensor::from_slice(&predictions, &[n_samples]).unwrap()
164 }
165}
166
167impl Default for HuberRegressor {
168 fn default() -> Self { Self::new() }
169}
170
171pub struct RANSACRegressor {
173 pub min_samples: Option<usize>,
174 pub residual_threshold: Option<f32>,
175 pub max_trials: usize,
176 coef_: Option<Vec<f32>>,
177 intercept_: f32,
178 inlier_mask_: Option<Vec<bool>>,
179}
180
181impl RANSACRegressor {
182 pub fn new() -> Self {
183 RANSACRegressor {
184 min_samples: None,
185 residual_threshold: None,
186 max_trials: 100,
187 coef_: None,
188 intercept_: 0.0,
189 inlier_mask_: None,
190 }
191 }
192
193 pub fn max_trials(mut self, n: usize) -> Self {
194 self.max_trials = n;
195 self
196 }
197
198 pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
199 let x_data = x.data_f32();
200 let y_data = y.data_f32();
201 let n_samples = x.dims()[0];
202 let n_features = x.dims()[1];
203
204 let min_samples = self.min_samples.unwrap_or(n_features + 1);
205
206 let y_mean = y_data.iter().sum::<f32>() / n_samples as f32;
208 let mut deviations: Vec<f32> = y_data.iter().map(|&y| (y - y_mean).abs()).collect();
209 deviations.sort_by(|a, b| a.partial_cmp(b).unwrap());
210 let threshold = self.residual_threshold.unwrap_or(deviations[n_samples / 2] * 2.0);
211
212 let mut rng = thread_rng();
213 let mut best_coef = vec![0.0f32; n_features];
214 let mut best_intercept = 0.0f32;
215 let mut best_inliers = vec![false; n_samples];
216 let mut best_n_inliers = 0;
217
218 for _ in 0..self.max_trials {
219 let indices: Vec<usize> = (0..n_samples).choose_multiple(&mut rng, min_samples);
220
221 let x_sample: Vec<f32> = indices.iter()
223 .flat_map(|&i| (0..n_features).map(|j| x_data[i * n_features + j]).collect::<Vec<_>>())
224 .collect();
225 let y_sample: Vec<f32> = indices.iter().map(|&i| y_data[i]).collect();
226
227 let (coef, intercept) = fit_ols(&x_sample, &y_sample, min_samples, n_features);
228
229 let mut inliers = vec![false; n_samples];
231 let mut n_inliers = 0;
232 for i in 0..n_samples {
233 let mut pred = intercept;
234 for j in 0..n_features {
235 pred += coef[j] * x_data[i * n_features + j];
236 }
237 if (y_data[i] - pred).abs() <= threshold {
238 inliers[i] = true;
239 n_inliers += 1;
240 }
241 }
242
243 if n_inliers > best_n_inliers {
244 let inlier_indices: Vec<usize> = inliers.iter()
246 .enumerate().filter(|(_, &b)| b).map(|(i, _)| i).collect();
247
248 if inlier_indices.len() >= min_samples {
249 let x_inliers: Vec<f32> = inlier_indices.iter()
250 .flat_map(|&i| (0..n_features).map(|j| x_data[i * n_features + j]).collect::<Vec<_>>())
251 .collect();
252 let y_inliers: Vec<f32> = inlier_indices.iter().map(|&i| y_data[i]).collect();
253
254 let (c, int) = fit_ols(&x_inliers, &y_inliers, inlier_indices.len(), n_features);
255 best_coef = c;
256 best_intercept = int;
257 best_inliers = inliers;
258 best_n_inliers = n_inliers;
259 }
260 }
261 }
262
263 self.coef_ = Some(best_coef);
264 self.intercept_ = best_intercept;
265 self.inlier_mask_ = Some(best_inliers);
266 }
267
268 pub fn predict(&self, x: &Tensor) -> Tensor {
269 let x_data = x.data_f32();
270 let n_samples = x.dims()[0];
271 let n_features = x.dims()[1];
272 let coef = self.coef_.as_ref().expect("Model not fitted");
273
274 let predictions: Vec<f32> = (0..n_samples)
275 .map(|i| {
276 let mut pred = self.intercept_;
277 for j in 0..n_features {
278 pred += coef[j] * x_data[i * n_features + j];
279 }
280 pred
281 })
282 .collect();
283
284 Tensor::from_slice(&predictions, &[n_samples]).unwrap()
285 }
286
287 pub fn inlier_mask(&self) -> Option<&Vec<bool>> {
288 self.inlier_mask_.as_ref()
289 }
290}
291
292impl Default for RANSACRegressor {
293 fn default() -> Self { Self::new() }
294}
295
296pub struct TheilSenRegressor {
298 pub fit_intercept: bool,
299 pub max_subpopulation: usize,
300 coef_: Option<Vec<f32>>,
301 intercept_: f32,
302}
303
304impl TheilSenRegressor {
305 pub fn new() -> Self {
306 TheilSenRegressor {
307 fit_intercept: true,
308 max_subpopulation: 10000,
309 coef_: None,
310 intercept_: 0.0,
311 }
312 }
313
314 pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
315 let x_data = x.data_f32();
316 let y_data = y.data_f32();
317 let n_samples = x.dims()[0];
318 let n_features = x.dims()[1];
319
320 let mut coef = vec![0.0f32; n_features];
321
322 for j in 0..n_features {
324 let mut slopes = Vec::new();
325 let max_pairs = self.max_subpopulation.min(n_samples * (n_samples - 1) / 2);
326 let mut count = 0;
327
328 'outer: for i1 in 0..n_samples {
329 for i2 in (i1 + 1)..n_samples {
330 let dx = x_data[i2 * n_features + j] - x_data[i1 * n_features + j];
331 if dx.abs() > 1e-10 {
332 let dy = y_data[i2] - y_data[i1];
333 slopes.push(dy / dx);
334 count += 1;
335 if count >= max_pairs {
336 break 'outer;
337 }
338 }
339 }
340 }
341
342 if !slopes.is_empty() {
343 slopes.sort_by(|a, b| a.partial_cmp(b).unwrap());
344 coef[j] = slopes[slopes.len() / 2];
345 }
346 }
347
348 self.intercept_ = if self.fit_intercept {
350 let mut intercepts: Vec<f32> = (0..n_samples)
351 .map(|i| {
352 let mut pred = 0.0f32;
353 for j in 0..n_features {
354 pred += coef[j] * x_data[i * n_features + j];
355 }
356 y_data[i] - pred
357 })
358 .collect();
359 intercepts.sort_by(|a, b| a.partial_cmp(b).unwrap());
360 intercepts[n_samples / 2]
361 } else {
362 0.0
363 };
364
365 self.coef_ = Some(coef);
366 }
367
368 pub fn predict(&self, x: &Tensor) -> Tensor {
369 let x_data = x.data_f32();
370 let n_samples = x.dims()[0];
371 let n_features = x.dims()[1];
372 let coef = self.coef_.as_ref().expect("Model not fitted");
373
374 let predictions: Vec<f32> = (0..n_samples)
375 .map(|i| {
376 let mut pred = self.intercept_;
377 for j in 0..n_features {
378 pred += coef[j] * x_data[i * n_features + j];
379 }
380 pred
381 })
382 .collect();
383
384 Tensor::from_slice(&predictions, &[n_samples]).unwrap()
385 }
386}
387
388impl Default for TheilSenRegressor {
389 fn default() -> Self { Self::new() }
390}
391
392pub struct QuantileRegressor {
394 pub quantile: f32,
395 pub alpha: f32,
396 pub max_iter: usize,
397 pub tol: f32,
398 pub fit_intercept: bool,
399 coef_: Option<Vec<f32>>,
400 intercept_: f32,
401}
402
403impl QuantileRegressor {
404 pub fn new(quantile: f32) -> Self {
405 QuantileRegressor {
406 quantile,
407 alpha: 1.0,
408 max_iter: 1000,
409 tol: 1e-4,
410 fit_intercept: true,
411 coef_: None,
412 intercept_: 0.0,
413 }
414 }
415
416 pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
417 let x_data = x.data_f32();
418 let y_data = y.data_f32();
419 let n_samples = x.dims()[0];
420 let n_features = x.dims()[1];
421
422 let mut coef = vec![0.0f32; n_features];
423 let mut intercept = 0.0f32;
424 let lr = 0.01f32;
425
426 for _ in 0..self.max_iter {
428 let mut grad_coef = vec![0.0f32; n_features];
429 let mut grad_intercept = 0.0f32;
430
431 for i in 0..n_samples {
432 let mut pred = intercept;
433 for j in 0..n_features {
434 pred += coef[j] * x_data[i * n_features + j];
435 }
436 let residual = y_data[i] - pred;
437
438 let weight = if residual >= 0.0 { self.quantile } else { self.quantile - 1.0 };
440
441 grad_intercept -= weight;
442 for j in 0..n_features {
443 grad_coef[j] -= weight * x_data[i * n_features + j];
444 }
445 }
446
447 for j in 0..n_features {
449 grad_coef[j] = grad_coef[j] / n_samples as f32 + self.alpha * coef[j];
450 coef[j] -= lr * grad_coef[j];
451 }
452 if self.fit_intercept {
453 intercept -= lr * grad_intercept / n_samples as f32;
454 }
455 }
456
457 self.coef_ = Some(coef);
458 self.intercept_ = intercept;
459 }
460
461 pub fn predict(&self, x: &Tensor) -> Tensor {
462 let x_data = x.data_f32();
463 let n_samples = x.dims()[0];
464 let n_features = x.dims()[1];
465 let coef = self.coef_.as_ref().expect("Model not fitted");
466
467 let predictions: Vec<f32> = (0..n_samples)
468 .map(|i| {
469 let mut pred = self.intercept_;
470 for j in 0..n_features {
471 pred += coef[j] * x_data[i * n_features + j];
472 }
473 pred
474 })
475 .collect();
476
477 Tensor::from_slice(&predictions, &[n_samples]).unwrap()
478 }
479}
480
481pub struct PassiveAggressiveRegressor {
483 pub c: f32,
484 pub max_iter: usize,
485 pub tol: f32,
486 pub epsilon: f32,
487 pub fit_intercept: bool,
488 coef_: Option<Vec<f32>>,
489 intercept_: f32,
490}
491
492impl PassiveAggressiveRegressor {
493 pub fn new() -> Self {
494 PassiveAggressiveRegressor {
495 c: 1.0,
496 max_iter: 1000,
497 tol: 1e-3,
498 epsilon: 0.1,
499 fit_intercept: true,
500 coef_: None,
501 intercept_: 0.0,
502 }
503 }
504
505 pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
506 let x_data = x.data_f32();
507 let y_data = y.data_f32();
508 let n_samples = x.dims()[0];
509 let n_features = x.dims()[1];
510
511 let mut coef = vec![0.0f32; n_features];
512 let mut intercept = 0.0f32;
513
514 for _ in 0..self.max_iter {
515 let mut max_update = 0.0f32;
516
517 for i in 0..n_samples {
518 let mut pred = intercept;
519 for j in 0..n_features {
520 pred += coef[j] * x_data[i * n_features + j];
521 }
522
523 let loss = (y_data[i] - pred).abs() - self.epsilon;
524 if loss > 0.0 {
525 let mut x_norm_sq = 1.0f32; for j in 0..n_features {
527 x_norm_sq += x_data[i * n_features + j].powi(2);
528 }
529
530 let sign = if y_data[i] > pred { 1.0 } else { -1.0 };
531 let tau = (loss / x_norm_sq).min(self.c);
532
533 for j in 0..n_features {
534 let update = tau * sign * x_data[i * n_features + j];
535 coef[j] += update;
536 max_update = max_update.max(update.abs());
537 }
538 if self.fit_intercept {
539 intercept += tau * sign;
540 }
541 }
542 }
543
544 if max_update < self.tol {
545 break;
546 }
547 }
548
549 self.coef_ = Some(coef);
550 self.intercept_ = intercept;
551 }
552
553 pub fn predict(&self, x: &Tensor) -> Tensor {
554 let x_data = x.data_f32();
555 let n_samples = x.dims()[0];
556 let n_features = x.dims()[1];
557 let coef = self.coef_.as_ref().expect("Model not fitted");
558
559 let predictions: Vec<f32> = (0..n_samples)
560 .map(|i| {
561 let mut pred = self.intercept_;
562 for j in 0..n_features {
563 pred += coef[j] * x_data[i * n_features + j];
564 }
565 pred
566 })
567 .collect();
568
569 Tensor::from_slice(&predictions, &[n_samples]).unwrap()
570 }
571}
572
573impl Default for PassiveAggressiveRegressor {
574 fn default() -> Self { Self::new() }
575}
576
577fn solve_linear_system(a: &[f32], b: &[f32], n: usize) -> Vec<f32> {
579 let mut l = vec![0.0f32; n * n];
580 let mut a_copy = a.to_vec();
581
582 for i in 0..n {
583 a_copy[i * n + i] += 1e-10;
584 }
585
586 for i in 0..n {
587 for j in 0..=i {
588 let mut sum = 0.0f32;
589 if i == j {
590 for k in 0..j {
591 sum += l[j * n + k] * l[j * n + k];
592 }
593 l[j * n + j] = (a_copy[j * n + j] - sum).max(1e-10).sqrt();
594 } else {
595 for k in 0..j {
596 sum += l[i * n + k] * l[j * n + k];
597 }
598 l[i * n + j] = (a_copy[i * n + j] - sum) / l[j * n + j].max(1e-10);
599 }
600 }
601 }
602
603 let mut y = vec![0.0f32; n];
604 for i in 0..n {
605 let mut sum = b[i];
606 for j in 0..i {
607 sum -= l[i * n + j] * y[j];
608 }
609 y[i] = sum / l[i * n + i].max(1e-10);
610 }
611
612 let mut x = vec![0.0f32; n];
613 for i in (0..n).rev() {
614 let mut sum = y[i];
615 for j in (i + 1)..n {
616 sum -= l[j * n + i] * x[j];
617 }
618 x[i] = sum / l[i * n + i].max(1e-10);
619 }
620 x
621}
622
623fn fit_ols(x: &[f32], y: &[f32], n_samples: usize, n_features: usize) -> (Vec<f32>, f32) {
624 let x_mean: Vec<f32> = (0..n_features)
625 .map(|j| (0..n_samples).map(|i| x[i * n_features + j]).sum::<f32>() / n_samples as f32)
626 .collect();
627 let y_mean = y.iter().sum::<f32>() / n_samples as f32;
628
629 let mut xtx = vec![0.0f32; n_features * n_features];
630 let mut xty = vec![0.0f32; n_features];
631
632 for i in 0..n_features {
633 for k in 0..n_samples {
634 let xki = x[k * n_features + i] - x_mean[i];
635 xty[i] += xki * (y[k] - y_mean);
636 }
637 for j in 0..n_features {
638 for k in 0..n_samples {
639 xtx[i * n_features + j] +=
640 (x[k * n_features + i] - x_mean[i]) * (x[k * n_features + j] - x_mean[j]);
641 }
642 }
643 xtx[i * n_features + i] += 1e-10;
644 }
645
646 let coef = solve_linear_system(&xtx, &xty, n_features);
647 let intercept = y_mean - coef.iter().zip(x_mean.iter()).map(|(&c, &m)| c * m).sum::<f32>();
648 (coef, intercept)
649}
650
651#[cfg(test)]
652mod tests {
653 use super::*;
654
655 #[test]
656 fn test_huber() {
657 let x = Tensor::from_slice(&[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0], &[3, 2]).unwrap();
658 let y = Tensor::from_slice(&[1.0f32, 2.0, 3.0], &[3]).unwrap();
659 let mut model = HuberRegressor::new();
660 model.fit(&x, &y);
661 let pred = model.predict(&x);
662 assert_eq!(pred.dims(), &[3]);
663 }
664
665 #[test]
666 fn test_ransac() {
667 let x = Tensor::from_slice(&[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0], &[3, 2]).unwrap();
668 let y = Tensor::from_slice(&[1.0f32, 2.0, 3.0], &[3]).unwrap();
669 let mut model = RANSACRegressor::new();
670 model.fit(&x, &y);
671 let pred = model.predict(&x);
672 assert_eq!(pred.dims(), &[3]);
673 }
674}
675
676