use rayon::prelude::*;
use crate::{
Booster,
WeakLearner,
Classifier,
WeightedMajority,
Sample,
common::utils,
research::Research,
};
use std::ops::ControlFlow;
pub struct MadaBoost<'a, F> {
sample: &'a Sample,
betas: Vec<f64>,
tolerance: f64,
alphas: Vec<f64>,
hypotheses: Vec<F>,
max_iter: usize,
force_quit_at: Option<usize>,
terminated: usize,
}
impl<'a, F> MadaBoost<'a, F> {
#[inline]
pub fn init(sample: &'a Sample) -> Self {
let n_sample = sample.shape().0;
Self {
sample,
betas: Vec::new(),
tolerance: 1.0 / (n_sample as f64 + 1.0),
alphas: Vec::new(),
hypotheses: Vec::new(),
max_iter: usize::MAX,
force_quit_at: None,
terminated: usize::MAX,
}
}
pub fn max_loop(&self) -> usize {
let n_sample = self.sample.shape().0 as f64;
((n_sample - 1f64) / self.tolerance.powi(2)).ceil() as usize
}
pub fn force_quit_at(mut self, it: usize) -> Self {
self.force_quit_at = Some(it);
self
}
pub fn tolerance(mut self, tolerance: f64) -> Self {
self.tolerance = tolerance;
self
}
#[inline]
fn update_params(
&mut self,
margins: Vec<f64>,
edge: f64
) -> f64
{
let eps = 0.5f64 * (1f64 - edge);
assert!((0f64..1f64).contains(&eps), "EPS: {}", eps);
let eps2 = (0.5f64 * eps).sqrt();
let beta = (eps2 / (1f64 - eps2)).sqrt();
let alpha = (1f64 / beta).ln();
self.betas.par_iter_mut()
.zip(margins)
.for_each(|(b, yh)| { *b += yh * beta; });
alpha
}
fn beta2distribution(&self) -> Vec<f64> {
let n_sample = self.sample.shape().0;
let weights = {
let mut weights = self.betas.iter()
.copied()
.map(|b| b.min(1f64))
.collect::<Vec<_>>();
weights.shrink_to_fit();
weights
};
let mut indices = (0..n_sample).into_par_iter()
.collect::<Vec<usize>>();
indices.sort_unstable_by(|&i, &j| {
weights[i].partial_cmp(&weights[j]).unwrap()
});
let mut normalizer = weights[indices[0]];
for i in indices.into_iter().skip(1) {
let mut a = normalizer;
let mut b = weights[i];
if a < b {
std::mem::swap(&mut a, &mut b);
}
normalizer = a + (1.0 + (b - a).exp()).ln();
}
weights.into_iter()
.map(|b| (b - normalizer).exp())
.collect()
}
}
impl<F> Booster<F> for MadaBoost<'_, F>
where F: Classifier + Clone,
{
type Output = WeightedMajority<F>;
fn name(&self) -> &str {
"MadaBoost"
}
fn info(&self) -> Option<Vec<(&str, String)>> {
let (n_sample, n_feature) = self.sample.shape();
let quit = if let Some(it) = self.force_quit_at {
format!("At round {it}")
} else {
format!("-")
};
let info = Vec::from([
("# of examples", format!("{}", n_sample)),
("# of features", format!("{}", n_feature)),
("Tolerance", format!("{}", self.tolerance)),
("Max iteration", format!("{}", self.max_loop())),
("Force quit", quit),
]);
Some(info)
}
fn preprocess<W>(
&mut self,
_weak_learner: &W,
)
where W: WeakLearner<Hypothesis = F>
{
self.sample.is_valid_binary_instance();
let n_sample = self.sample.shape().0;
let uni = 1.0 / n_sample as f64;
self.betas = vec![uni; n_sample];
self.alphas = Vec::new();
self.hypotheses = Vec::new();
self.max_iter = self.max_loop();
if let Some(it) = self.force_quit_at {
self.max_iter = it;
}
}
fn boost<W>(
&mut self,
weak_learner: &W,
iteration: usize,
) -> ControlFlow<usize>
where W: WeakLearner<Hypothesis = F>,
{
if self.max_iter < iteration {
return ControlFlow::Break(self.max_iter);
}
let dist = self.beta2distribution();
let h = weak_learner.produce(self.sample, &dist[..]);
let margins = utils::margins_of_hypothesis(self.sample, &h);
let edge = utils::inner_product(&margins, &dist[..]);
if edge.abs() >= 1.0 {
self.terminated = iteration;
self.alphas = vec![edge.signum()];
self.hypotheses = vec![h];
return ControlFlow::Break(iteration);
}
let alpha = self.update_params(margins, edge);
self.alphas.push(alpha);
self.hypotheses.push(h);
ControlFlow::Continue(())
}
fn postprocess<W>(
&mut self,
_weak_learner: &W,
) -> Self::Output
where W: WeakLearner<Hypothesis = F>
{
WeightedMajority::from_slices(&self.alphas[..], &self.hypotheses[..])
}
}
impl<H> Research for MadaBoost<'_, H>
where H: Classifier + Clone,
{
type Output = WeightedMajority<H>;
fn current_hypothesis(&self) -> Self::Output {
WeightedMajority::from_slices(&self.alphas[..], &self.hypotheses[..])
}
}