1use ghostflow_core::Tensor;
4
5pub trait BaseEstimator: Send + Sync {
7 fn fit(&mut self, x: &Tensor, y: &Tensor);
8 fn predict(&self, x: &Tensor) -> Tensor;
9 fn clone_box(&self) -> Box<dyn BaseEstimator>;
10}
11
12#[derive(Clone)]
14pub struct LinearMeta {
15 coef_: Option<Vec<f32>>,
16 intercept_: f32,
17}
18
19impl LinearMeta {
20 pub fn new() -> Self {
21 LinearMeta {
22 coef_: None,
23 intercept_: 0.0,
24 }
25 }
26
27 pub fn fit(&mut self, x: &[f32], y: &[f32], n_samples: usize, n_features: usize) {
28 let alpha = 1.0f32;
30
31 let x_mean: Vec<f32> = (0..n_features)
32 .map(|j| (0..n_samples).map(|i| x[i * n_features + j]).sum::<f32>() / n_samples as f32)
33 .collect();
34 let y_mean = y.iter().sum::<f32>() / n_samples as f32;
35
36 let mut xtx = vec![0.0f32; n_features * n_features];
37 let mut xty = vec![0.0f32; n_features];
38
39 for i in 0..n_features {
40 for k in 0..n_samples {
41 let xki = x[k * n_features + i] - x_mean[i];
42 xty[i] += xki * (y[k] - y_mean);
43 }
44 for j in 0..n_features {
45 for k in 0..n_samples {
46 xtx[i * n_features + j] +=
47 (x[k * n_features + i] - x_mean[i]) * (x[k * n_features + j] - x_mean[j]);
48 }
49 }
50 xtx[i * n_features + i] += alpha;
51 }
52
53 let coef = solve_linear(&xtx, &xty, n_features);
54 let intercept = y_mean - coef.iter().zip(x_mean.iter()).map(|(&c, &m)| c * m).sum::<f32>();
55
56 self.coef_ = Some(coef);
57 self.intercept_ = intercept;
58 }
59
60 pub fn predict(&self, x: &[f32], n_samples: usize, n_features: usize) -> Vec<f32> {
61 let coef = self.coef_.as_ref().expect("Not fitted");
62 (0..n_samples)
63 .map(|i| {
64 let mut pred = self.intercept_;
65 for j in 0..n_features {
66 pred += coef[j] * x[i * n_features + j];
67 }
68 pred
69 })
70 .collect()
71 }
72}
73
74impl Default for LinearMeta {
75 fn default() -> Self { Self::new() }
76}
77
78#[derive(Clone)]
80pub struct LogisticMeta {
81 coef_: Option<Vec<f32>>,
82 intercept_: f32,
83 max_iter: usize,
84}
85
86impl LogisticMeta {
87 pub fn new() -> Self {
88 LogisticMeta {
89 coef_: None,
90 intercept_: 0.0,
91 max_iter: 100,
92 }
93 }
94
95 fn sigmoid(x: f32) -> f32 {
96 1.0 / (1.0 + (-x).exp())
97 }
98
99 pub fn fit(&mut self, x: &[f32], y: &[f32], n_samples: usize, n_features: usize) {
100 let mut coef = vec![0.0f32; n_features];
101 let mut intercept = 0.0f32;
102 let lr = 0.1f32;
103 let alpha = 0.01f32;
104
105 for _ in 0..self.max_iter {
106 let mut grad_coef = vec![0.0f32; n_features];
107 let mut grad_intercept = 0.0f32;
108
109 for i in 0..n_samples {
110 let mut z = intercept;
111 for j in 0..n_features {
112 z += coef[j] * x[i * n_features + j];
113 }
114 let pred = Self::sigmoid(z);
115 let error = pred - y[i];
116
117 grad_intercept += error;
118 for j in 0..n_features {
119 grad_coef[j] += error * x[i * n_features + j];
120 }
121 }
122
123 intercept -= lr * grad_intercept / n_samples as f32;
124 for j in 0..n_features {
125 coef[j] -= lr * (grad_coef[j] / n_samples as f32 + alpha * coef[j]);
126 }
127 }
128
129 self.coef_ = Some(coef);
130 self.intercept_ = intercept;
131 }
132
133 pub fn predict_proba(&self, x: &[f32], n_samples: usize, n_features: usize) -> Vec<f32> {
134 let coef = self.coef_.as_ref().expect("Not fitted");
135 (0..n_samples)
136 .map(|i| {
137 let mut z = self.intercept_;
138 for j in 0..n_features {
139 z += coef[j] * x[i * n_features + j];
140 }
141 Self::sigmoid(z)
142 })
143 .collect()
144 }
145
146 pub fn predict(&self, x: &[f32], n_samples: usize, n_features: usize) -> Vec<f32> {
147 self.predict_proba(x, n_samples, n_features)
148 .into_iter()
149 .map(|p| if p >= 0.5 { 1.0 } else { 0.0 })
150 .collect()
151 }
152}
153
154impl Default for LogisticMeta {
155 fn default() -> Self { Self::new() }
156}
157
158pub struct StackingRegressor {
160 pub cv: usize,
161 pub passthrough: bool,
162 estimators_: Vec<Box<dyn Fn() -> Box<dyn StackingEstimator>>>,
163 fitted_estimators_: Vec<Vec<Box<dyn StackingEstimator>>>,
164 final_estimator_: LinearMeta,
165 n_features_: usize,
166}
167
168pub trait StackingEstimator: Send {
169 fn fit(&mut self, x: &Tensor, y: &Tensor);
170 fn predict(&self, x: &Tensor) -> Tensor;
171}
172
173impl StackingRegressor {
174 pub fn new() -> Self {
175 StackingRegressor {
176 cv: 5,
177 passthrough: false,
178 estimators_: Vec::new(),
179 fitted_estimators_: Vec::new(),
180 final_estimator_: LinearMeta::new(),
181 n_features_: 0,
182 }
183 }
184
185 pub fn cv(mut self, cv: usize) -> Self {
186 self.cv = cv;
187 self
188 }
189
190 pub fn passthrough(mut self, p: bool) -> Self {
191 self.passthrough = p;
192 self
193 }
194
195 pub fn add_estimator<F>(&mut self, factory: F)
196 where
197 F: Fn() -> Box<dyn StackingEstimator> + 'static
198 {
199 self.estimators_.push(Box::new(factory));
200 }
201
202 pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
203 let x_data = x.data_f32();
204 let y_data = y.data_f32();
205 let n_samples = x.dims()[0];
206 let n_features = x.dims()[1];
207 let n_estimators = self.estimators_.len();
208
209 self.n_features_ = n_features;
210
211 if n_estimators == 0 {
212 panic!("No estimators added");
213 }
214
215 let fold_size = n_samples / self.cv;
217 let mut meta_features = vec![0.0f32; n_samples * n_estimators];
218
219 self.fitted_estimators_ = (0..n_estimators).map(|_| Vec::new()).collect();
220
221 for fold in 0..self.cv {
222 let val_start = fold * fold_size;
223 let val_end = if fold == self.cv - 1 { n_samples } else { (fold + 1) * fold_size };
224
225 let train_indices: Vec<usize> = (0..n_samples)
227 .filter(|&i| i < val_start || i >= val_end)
228 .collect();
229 let val_indices: Vec<usize> = (val_start..val_end).collect();
230
231 let x_train: Vec<f32> = train_indices.iter()
232 .flat_map(|&i| x_data[i * n_features..(i + 1) * n_features].to_vec())
233 .collect();
234 let y_train: Vec<f32> = train_indices.iter().map(|&i| y_data[i]).collect();
235
236 let x_val: Vec<f32> = val_indices.iter()
237 .flat_map(|&i| x_data[i * n_features..(i + 1) * n_features].to_vec())
238 .collect();
239
240 let x_train_tensor = Tensor::from_slice(&x_train, &[train_indices.len(), n_features]).unwrap();
241 let y_train_tensor = Tensor::from_slice(&y_train, &[train_indices.len()]).unwrap();
242 let x_val_tensor = Tensor::from_slice(&x_val, &[val_indices.len(), n_features]).unwrap();
243
244 for (est_idx, factory) in self.estimators_.iter().enumerate() {
246 let mut estimator = factory();
247 estimator.fit(&x_train_tensor, &y_train_tensor);
248
249 let predictions = estimator.predict(&x_val_tensor);
250 let pred_data = predictions.data_f32();
251
252 for (i, &val_idx) in val_indices.iter().enumerate() {
253 meta_features[val_idx * n_estimators + est_idx] = pred_data[i];
254 }
255
256 if fold == self.cv - 1 {
257 self.fitted_estimators_[est_idx].push(estimator);
258 }
259 }
260 }
261
262 for (est_idx, factory) in self.estimators_.iter().enumerate() {
264 let mut estimator = factory();
265 estimator.fit(x, y);
266 self.fitted_estimators_[est_idx] = vec![estimator];
267 }
268
269 let meta_n_features = if self.passthrough {
271 n_estimators + n_features
272 } else {
273 n_estimators
274 };
275
276 let final_meta: Vec<f32> = if self.passthrough {
277 (0..n_samples)
278 .flat_map(|i| {
279 let mut row = Vec::with_capacity(meta_n_features);
280 for j in 0..n_estimators {
281 row.push(meta_features[i * n_estimators + j]);
282 }
283 for j in 0..n_features {
284 row.push(x_data[i * n_features + j]);
285 }
286 row
287 })
288 .collect()
289 } else {
290 meta_features
291 };
292
293 self.final_estimator_.fit(&final_meta, &y_data, n_samples, meta_n_features);
294 }
295
296 pub fn predict(&self, x: &Tensor) -> Tensor {
297 let x_data = x.data_f32();
298 let n_samples = x.dims()[0];
299 let n_features = x.dims()[1];
300 let n_estimators = self.fitted_estimators_.len();
301
302 let mut meta_features = vec![0.0f32; n_samples * n_estimators];
304
305 for (est_idx, estimators) in self.fitted_estimators_.iter().enumerate() {
306 let predictions = estimators[0].predict(x);
307 let pred_data = predictions.data_f32();
308 for i in 0..n_samples {
309 meta_features[i * n_estimators + est_idx] = pred_data[i];
310 }
311 }
312
313 let meta_n_features = if self.passthrough {
315 n_estimators + n_features
316 } else {
317 n_estimators
318 };
319
320 let final_meta: Vec<f32> = if self.passthrough {
321 (0..n_samples)
322 .flat_map(|i| {
323 let mut row = Vec::with_capacity(meta_n_features);
324 for j in 0..n_estimators {
325 row.push(meta_features[i * n_estimators + j]);
326 }
327 for j in 0..n_features {
328 row.push(x_data[i * n_features + j]);
329 }
330 row
331 })
332 .collect()
333 } else {
334 meta_features
335 };
336
337 let predictions = self.final_estimator_.predict(&final_meta, n_samples, meta_n_features);
338 Tensor::from_slice(&predictions, &[n_samples]).unwrap()
339 }
340}
341
342impl Default for StackingRegressor {
343 fn default() -> Self { Self::new() }
344}
345
346pub struct StackingClassifier {
348 pub cv: usize,
349 pub passthrough: bool,
350 pub stack_method: StackMethod,
351 estimators_: Vec<Box<dyn Fn() -> Box<dyn StackingClassifierEstimator>>>,
352 fitted_estimators_: Vec<Vec<Box<dyn StackingClassifierEstimator>>>,
353 final_estimator_: LogisticMeta,
354 classes_: Vec<i32>,
355 n_features_: usize,
356}
357
358#[derive(Clone, Copy)]
359pub enum StackMethod {
360 Predict,
361 PredictProba,
362}
363
364pub trait StackingClassifierEstimator: Send {
365 fn fit(&mut self, x: &Tensor, y: &Tensor);
366 fn predict(&self, x: &Tensor) -> Tensor;
367 fn predict_proba(&self, x: &Tensor) -> Option<Tensor>;
368}
369
370impl StackingClassifier {
371 pub fn new() -> Self {
372 StackingClassifier {
373 cv: 5,
374 passthrough: false,
375 stack_method: StackMethod::PredictProba,
376 estimators_: Vec::new(),
377 fitted_estimators_: Vec::new(),
378 final_estimator_: LogisticMeta::new(),
379 classes_: Vec::new(),
380 n_features_: 0,
381 }
382 }
383
384 pub fn cv(mut self, cv: usize) -> Self {
385 self.cv = cv;
386 self
387 }
388
389 pub fn stack_method(mut self, m: StackMethod) -> Self {
390 self.stack_method = m;
391 self
392 }
393
394 pub fn add_estimator<F>(&mut self, factory: F)
395 where
396 F: Fn() -> Box<dyn StackingClassifierEstimator> + 'static
397 {
398 self.estimators_.push(Box::new(factory));
399 }
400
401 pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
402 let x_data = x.data_f32();
403 let y_data = y.data_f32();
404 let n_samples = x.dims()[0];
405 let n_features = x.dims()[1];
406 let n_estimators = self.estimators_.len();
407
408 self.n_features_ = n_features;
409
410 let mut classes: Vec<i32> = y_data.iter().map(|&v| v as i32).collect();
412 classes.sort();
413 classes.dedup();
414 self.classes_ = classes;
415
416 if n_estimators == 0 {
417 panic!("No estimators added");
418 }
419
420 let meta_cols_per_est = 1;
422 let fold_size = n_samples / self.cv;
423 let mut meta_features = vec![0.0f32; n_samples * n_estimators * meta_cols_per_est];
424
425 self.fitted_estimators_ = (0..n_estimators).map(|_| Vec::new()).collect();
426
427 for fold in 0..self.cv {
428 let val_start = fold * fold_size;
429 let val_end = if fold == self.cv - 1 { n_samples } else { (fold + 1) * fold_size };
430
431 let train_indices: Vec<usize> = (0..n_samples)
432 .filter(|&i| i < val_start || i >= val_end)
433 .collect();
434 let val_indices: Vec<usize> = (val_start..val_end).collect();
435
436 let x_train: Vec<f32> = train_indices.iter()
437 .flat_map(|&i| x_data[i * n_features..(i + 1) * n_features].to_vec())
438 .collect();
439 let y_train: Vec<f32> = train_indices.iter().map(|&i| y_data[i]).collect();
440
441 let x_val: Vec<f32> = val_indices.iter()
442 .flat_map(|&i| x_data[i * n_features..(i + 1) * n_features].to_vec())
443 .collect();
444
445 let x_train_tensor = Tensor::from_slice(&x_train, &[train_indices.len(), n_features]).unwrap();
446 let y_train_tensor = Tensor::from_slice(&y_train, &[train_indices.len()]).unwrap();
447 let x_val_tensor = Tensor::from_slice(&x_val, &[val_indices.len(), n_features]).unwrap();
448
449 for (est_idx, factory) in self.estimators_.iter().enumerate() {
450 let mut estimator = factory();
451 estimator.fit(&x_train_tensor, &y_train_tensor);
452
453 let pred_data = match self.stack_method {
454 StackMethod::PredictProba => {
455 if let Some(proba) = estimator.predict_proba(&x_val_tensor) {
456 proba.data_f32().clone()
457 } else {
458 estimator.predict(&x_val_tensor).data_f32().clone()
459 }
460 }
461 StackMethod::Predict => {
462 estimator.predict(&x_val_tensor).data_f32().clone()
463 }
464 };
465
466 for (i, &val_idx) in val_indices.iter().enumerate() {
467 meta_features[val_idx * n_estimators + est_idx] = pred_data[i];
468 }
469 }
470 }
471
472 for (est_idx, factory) in self.estimators_.iter().enumerate() {
474 let mut estimator = factory();
475 estimator.fit(x, y);
476 self.fitted_estimators_[est_idx] = vec![estimator];
477 }
478
479 let meta_n_features = if self.passthrough {
481 n_estimators + n_features
482 } else {
483 n_estimators
484 };
485
486 let final_meta: Vec<f32> = if self.passthrough {
487 (0..n_samples)
488 .flat_map(|i| {
489 let mut row = Vec::with_capacity(meta_n_features);
490 for j in 0..n_estimators {
491 row.push(meta_features[i * n_estimators + j]);
492 }
493 for j in 0..n_features {
494 row.push(x_data[i * n_features + j]);
495 }
496 row
497 })
498 .collect()
499 } else {
500 meta_features
501 };
502
503 self.final_estimator_.fit(&final_meta, &y_data, n_samples, meta_n_features);
504 }
505
506 pub fn predict(&self, x: &Tensor) -> Tensor {
507 let x_data = x.data_f32();
508 let n_samples = x.dims()[0];
509 let n_features = x.dims()[1];
510 let n_estimators = self.fitted_estimators_.len();
511
512 let mut meta_features = vec![0.0f32; n_samples * n_estimators];
513
514 for (est_idx, estimators) in self.fitted_estimators_.iter().enumerate() {
515 let pred_data = match self.stack_method {
516 StackMethod::PredictProba => {
517 if let Some(proba) = estimators[0].predict_proba(x) {
518 proba.data_f32().clone()
519 } else {
520 estimators[0].predict(x).data_f32().clone()
521 }
522 }
523 StackMethod::Predict => {
524 estimators[0].predict(x).data_f32().clone()
525 }
526 };
527
528 for i in 0..n_samples {
529 meta_features[i * n_estimators + est_idx] = pred_data[i];
530 }
531 }
532
533 let meta_n_features = if self.passthrough {
534 n_estimators + n_features
535 } else {
536 n_estimators
537 };
538
539 let final_meta: Vec<f32> = if self.passthrough {
540 (0..n_samples)
541 .flat_map(|i| {
542 let mut row = Vec::with_capacity(meta_n_features);
543 for j in 0..n_estimators {
544 row.push(meta_features[i * n_estimators + j]);
545 }
546 for j in 0..n_features {
547 row.push(x_data[i * n_features + j]);
548 }
549 row
550 })
551 .collect()
552 } else {
553 meta_features
554 };
555
556 let predictions = self.final_estimator_.predict(&final_meta, n_samples, meta_n_features);
557 Tensor::from_slice(&predictions, &[n_samples]).unwrap()
558 }
559
560 pub fn predict_proba(&self, x: &Tensor) -> Tensor {
561 let x_data = x.data_f32();
562 let n_samples = x.dims()[0];
563 let n_features = x.dims()[1];
564 let n_estimators = self.fitted_estimators_.len();
565
566 let mut meta_features = vec![0.0f32; n_samples * n_estimators];
567
568 for (est_idx, estimators) in self.fitted_estimators_.iter().enumerate() {
569 let pred_data = match self.stack_method {
570 StackMethod::PredictProba => {
571 if let Some(proba) = estimators[0].predict_proba(x) {
572 proba.data_f32().clone()
573 } else {
574 estimators[0].predict(x).data_f32().clone()
575 }
576 }
577 StackMethod::Predict => {
578 estimators[0].predict(x).data_f32().clone()
579 }
580 };
581
582 for i in 0..n_samples {
583 meta_features[i * n_estimators + est_idx] = pred_data[i];
584 }
585 }
586
587 let meta_n_features = if self.passthrough {
588 n_estimators + n_features
589 } else {
590 n_estimators
591 };
592
593 let final_meta: Vec<f32> = if self.passthrough {
594 (0..n_samples)
595 .flat_map(|i| {
596 let mut row = Vec::with_capacity(meta_n_features);
597 for j in 0..n_estimators {
598 row.push(meta_features[i * n_estimators + j]);
599 }
600 for j in 0..n_features {
601 row.push(x_data[i * n_features + j]);
602 }
603 row
604 })
605 .collect()
606 } else {
607 meta_features
608 };
609
610 let proba = self.final_estimator_.predict_proba(&final_meta, n_samples, meta_n_features);
611
612 let result: Vec<f32> = proba.iter()
614 .flat_map(|&p| vec![1.0 - p, p])
615 .collect();
616
617 Tensor::from_slice(&result, &[n_samples, 2]).unwrap()
618 }
619}
620
621impl Default for StackingClassifier {
622 fn default() -> Self { Self::new() }
623}
624
625fn solve_linear(a: &[f32], b: &[f32], n: usize) -> Vec<f32> {
626 let mut aug = vec![0.0f32; n * (n + 1)];
627 for i in 0..n {
628 for j in 0..n {
629 aug[i * (n + 1) + j] = a[i * n + j];
630 }
631 aug[i * (n + 1) + n] = b[i];
632 }
633
634 for i in 0..n {
635 let mut max_row = i;
636 for k in (i + 1)..n {
637 if aug[k * (n + 1) + i].abs() > aug[max_row * (n + 1) + i].abs() {
638 max_row = k;
639 }
640 }
641
642 for j in 0..=n {
643 aug.swap(i * (n + 1) + j, max_row * (n + 1) + j);
644 }
645
646 let pivot = aug[i * (n + 1) + i];
647 if pivot.abs() < 1e-10 { continue; }
648
649 for j in i..=n {
650 aug[i * (n + 1) + j] /= pivot;
651 }
652
653 for k in 0..n {
654 if k != i {
655 let factor = aug[k * (n + 1) + i];
656 for j in i..=n {
657 aug[k * (n + 1) + j] -= factor * aug[i * (n + 1) + j];
658 }
659 }
660 }
661 }
662
663 (0..n).map(|i| aug[i * (n + 1) + n]).collect()
664}
665
666#[cfg(test)]
667mod tests {
668 use super::*;
669
670 #[test]
671 fn test_linear_meta() {
672 let x = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
673 let y = vec![1.0, 2.0, 3.0];
674
675 let mut meta = LinearMeta::new();
676 meta.fit(&x, &y, 3, 2);
677 let pred = meta.predict(&x, 3, 2);
678
679 assert_eq!(pred.len(), 3);
680 }
681
682 #[test]
683 fn test_logistic_meta() {
684 let x = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
685 let y = vec![0.0, 0.0, 1.0, 1.0];
686
687 let mut meta = LogisticMeta::new();
688 meta.fit(&x, &y, 4, 2);
689 let pred = meta.predict(&x, 4, 2);
690
691 assert_eq!(pred.len(), 4);
692 }
693}
694
695