use rayon::prelude::*;
use crate::{
Booster,
WeakLearner,
State,
Classifier,
CombinedHypothesis,
Sample,
common::utils,
research::Research,
};
pub struct AdaBoost<'a, F> {
sample: &'a Sample,
dist: Vec<f64>,
tolerance: f64,
weights: Vec<f64>,
hypotheses: Vec<F>,
max_iter: usize,
terminated: usize,
}
impl<'a, F> AdaBoost<'a, F> {
pub fn init(sample: &'a Sample) -> Self {
let n_sample = sample.shape().0;
let uni = 1.0 / n_sample as f64;
AdaBoost {
sample,
dist: vec![uni; n_sample],
tolerance: 1.0 / (n_sample as f64 + 1.0),
weights: Vec::new(),
hypotheses: Vec::new(),
max_iter: usize::MAX,
terminated: usize::MAX,
}
}
pub fn max_loop(&self) -> usize {
let n_sample = self.sample.shape().0 as f64;
(n_sample.ln() / self.tolerance.powi(2)) as usize
}
pub fn tolerance(mut self, tolerance: f64) -> Self {
self.tolerance = tolerance;
self
}
#[inline]
fn update_params(
&mut self,
margins: Vec<f64>,
edge: f64
) -> f64
{
let n_sample = self.sample.shape().0;
let weight = ((1.0 + edge) / (1.0 - edge)).ln() / 2.0;
self.dist.par_iter_mut()
.zip(margins)
.for_each(|(d, p)| *d = d.ln() - weight * p);
let mut indices = (0..n_sample).into_par_iter()
.collect::<Vec<usize>>();
indices.sort_unstable_by(|&i, &j| {
self.dist[i].partial_cmp(&self.dist[j]).unwrap()
});
let mut normalizer = self.dist[indices[0]];
for i in indices.into_iter().skip(1) {
let mut a = normalizer;
let mut b = self.dist[i];
if a < b {
std::mem::swap(&mut a, &mut b);
}
normalizer = a + (1.0 + (b - a).exp()).ln();
}
self.dist.par_iter_mut()
.for_each(|d| *d = (*d - normalizer).exp());
weight
}
}
impl<F> Booster<F> for AdaBoost<'_, F>
where F: Classifier + Clone,
{
fn preprocess<W>(
&mut self,
_weak_learner: &W,
)
where W: WeakLearner<Hypothesis = F>
{
let n_sample = self.sample.shape().0;
let uni = 1.0 / n_sample as f64;
self.dist = vec![uni; n_sample];
self.weights = Vec::new();
self.hypotheses = Vec::new();
self.max_iter = self.max_loop();
}
fn boost<W>(
&mut self,
weak_learner: &W,
iteration: usize,
) -> State
where W: WeakLearner<Hypothesis = F>,
{
if self.max_iter < iteration {
return State::Terminate;
}
let h = weak_learner.produce(self.sample, &self.dist);
let margins = utils::margins_of_hypothesis(self.sample, &h);
let edge = utils::inner_product(&margins, &self.dist);
if edge.abs() >= 1.0 {
self.terminated = iteration;
self.weights = vec![edge.signum()];
self.hypotheses = vec![h];
return State::Terminate;
}
let weight = self.update_params(margins, edge);
self.weights.push(weight);
self.hypotheses.push(h);
State::Continue
}
fn postprocess<W>(
&mut self,
_weak_learner: &W,
) -> CombinedHypothesis<F>
where W: WeakLearner<Hypothesis = F>
{
CombinedHypothesis::from_slices(&self.weights[..], &self.hypotheses[..])
}
}
impl<H> Research<H> for AdaBoost<'_, H>
where H: Classifier + Clone,
{
fn current_hypothesis(&self) -> CombinedHypothesis<H> {
CombinedHypothesis::from_slices(&self.weights[..], &self.hypotheses[..])
}
}