use rayon::prelude::*;
use crate::{
common::loss_functions::*,
Sample,
Booster,
WeakLearner,
Regressor,
WeightedMajority
};
use std::ops::ControlFlow;
pub struct GBM<'a, F> {
sample: &'a Sample,
tolerance: f64,
weights: Vec<f64>,
hypotheses: Vec<F>,
loss: GBMLoss,
max_iter: usize,
terminated: usize,
predictions: Vec<f64>,
}
impl<'a, F> GBM<'a, F>
{
pub fn init(sample: &'a Sample) -> Self {
let n_sample = sample.shape().0;
let predictions = vec![0.0; n_sample];
Self {
sample,
tolerance: 0.0,
weights: Vec::new(),
hypotheses: Vec::new(),
loss: GBMLoss::L2,
max_iter: 100,
terminated: usize::MAX,
predictions,
}
}
}
impl<'a, F> GBM<'a, F> {
pub fn max_loop(&self) -> usize {
let n_sample = self.sample.shape().0 as f64;
(n_sample.ln() / self.tolerance.powi(2)) as usize
}
pub fn tolerance(mut self, tolerance: f64) -> Self {
self.tolerance = tolerance;
self
}
pub fn loss(mut self, loss_type: GBMLoss) -> Self {
self.loss = loss_type;
self
}
}
impl<F> Booster<F> for GBM<'_, F>
where F: Regressor + Clone,
{
type Output = WeightedMajority<F>;
fn name(&self) -> &str {
"Gradient Boosting Machine"
}
fn info(&self) -> Option<Vec<(&str, String)>> {
let (n_sample, n_feature) = self.sample.shape();
let info = Vec::from([
("# of examples", format!("{n_sample}")),
("# of features", format!("{n_feature}")),
("Tolerance", format!("{}", self.tolerance)),
("Loss", format!("{}", self.loss.name())),
("Max iteration", format!("{}", self.max_iter)),
]);
Some(info)
}
fn preprocess<W>(
&mut self,
_weak_learner: &W,
)
where W: WeakLearner<Hypothesis = F>
{
let n_sample = self.sample.shape().0;
self.weights = Vec::with_capacity(self.max_iter);
self.hypotheses = Vec::with_capacity(self.max_iter);
self.terminated = self.max_iter;
self.predictions = vec![0.0; n_sample];
}
fn boost<W>(
&mut self,
weak_learner: &W,
iteration: usize,
) -> ControlFlow<usize>
where W: WeakLearner<Hypothesis = F>,
{
if self.max_iter < iteration {
return ControlFlow::Break(self.max_iter);
}
let h = weak_learner.produce(self.sample, &self.predictions[..]);
let predictions = h.predict_all(self.sample);
let coef = self.loss.best_coefficient(
&self.sample.target(), &predictions[..]
);
if coef == 0.0 {
self.terminated = iteration;
return ControlFlow::Break(iteration);
}
self.weights.push(coef);
self.hypotheses.push(h);
self.predictions.par_iter_mut()
.zip(predictions)
.for_each(|(p, q)| { *p += coef * q; });
ControlFlow::Continue(())
}
fn postprocess<W>(
&mut self,
_weak_learner: &W,
) -> Self::Output
where W: WeakLearner<Hypothesis = F>
{
WeightedMajority::from_slices(&self.weights[..], &self.hypotheses[..])
}
}