#[cfg(not(feature="gurobi"))]
use super::qp_model::QPModel;
#[cfg(feature="gurobi")]
use super::gurobi_qp_model::QPModel;
use crate::{
Sample,
Booster,
WeakLearner,
Classifier,
WeightedMajority,
common::utils,
common::checker,
research::Research,
};
use std::cell::RefCell;
use std::ops::ControlFlow;
pub struct ERLPBoost<'a, F> {
sample: &'a Sample,
dist: Vec<f64>,
gamma_hat: f64,
gamma_star: f64,
eta: f64,
half_tolerance: f64,
qp_model: Option<RefCell<QPModel>>,
hypotheses: Vec<F>,
weights: Vec<f64>,
n_sample: usize,
nu: f64,
terminated: usize,
max_iter: usize,
}
impl<'a, F> ERLPBoost<'a, F> {
pub fn init(sample: &'a Sample) -> Self {
let n_sample = sample.shape().0;
assert!(n_sample != 0);
let ln_n_sample = (n_sample as f64).ln();
let half_tolerance = 0.005;
let eta = 0.5_f64.max(ln_n_sample / half_tolerance);
let gamma_hat = 1.0;
let gamma_star = f64::MIN;
ERLPBoost {
sample,
dist: Vec::new(),
gamma_hat,
gamma_star,
eta,
half_tolerance,
qp_model: None,
hypotheses: Vec::new(),
weights: Vec::new(),
n_sample,
nu: 1.0,
terminated: usize::MAX,
max_iter: usize::MAX,
}
}
fn init_solver(&mut self) {
checker::check_nu(self.nu, self.n_sample);
let upper_bound = 1.0 / self.nu;
let qp_model = RefCell::new(QPModel::init(
self.eta, self.n_sample, upper_bound
));
self.qp_model = Some(qp_model);
}
pub fn nu(mut self, nu: f64) -> Self {
assert!(1.0 <= nu && nu <= self.n_sample as f64);
self.nu = nu;
self.regularization_param();
self
}
#[inline(always)]
pub fn terminated(&self) -> usize {
self.terminated
}
#[inline(always)]
pub fn tolerance(mut self, tolerance: f64) -> Self {
self.half_tolerance = tolerance / 2.0;
self
}
#[inline(always)]
fn regularization_param(&mut self) {
let ln_n_sample = (self.n_sample as f64 / self.nu).ln();
self.eta = 0.5_f64.max(ln_n_sample / self.half_tolerance);
}
fn max_loop(&mut self) -> usize {
let n_sample = self.n_sample as f64;
let mut max_iter = 4.0 / self.half_tolerance;
let ln_n_sample = (n_sample / self.nu).ln();
let temp = 8.0 * ln_n_sample / self.half_tolerance.powi(2);
max_iter = max_iter.max(temp);
max_iter.ceil() as usize
}
}
impl<F> ERLPBoost<'_, F>
where F: Classifier
{
#[inline]
fn update_gamma_hat_mut(&mut self, h: &F)
{
let edge = utils::edge_of_hypothesis(self.sample, &self.dist[..], h);
let entropy = utils::entropy_from_uni_distribution(&self.dist[..]);
let obj_val = edge + (entropy / self.eta);
self.gamma_hat = self.gamma_hat.min(obj_val);
}
fn update_gamma_star_mut(&mut self)
{
let max_edge = self.hypotheses.iter()
.map(|h|
utils::edge_of_hypothesis(self.sample, &self.dist, h)
)
.reduce(f64::max)
.expect("Failed to compute the max-edge");
let entropy = utils::entropy_from_uni_distribution(&self.dist);
self.gamma_star = max_edge + (entropy / self.eta);
}
fn update_distribution_mut(&mut self, clf: &F)
{
self.qp_model.as_ref()
.expect("Failed to call `.as_ref()` to `self.qp_model`")
.borrow_mut()
.update(self.sample, &mut self.dist[..], clf);
self.dist = self.qp_model.as_ref()
.expect("Failed to call `.as_ref()` to `self.qp_model`")
.borrow()
.distribution();
}
}
impl<F> Booster<F> for ERLPBoost<'_, F>
where F: Classifier + Clone,
{
type Output = WeightedMajority<F>;
fn name(&self) -> &str {
"ERLPBoost"
}
fn info(&self) -> Option<Vec<(&str, String)>> {
let (n_sample, n_feature) = self.sample.shape();
let ratio = self.nu * 100f64 / n_sample as f64;
let nu = utils::format_unit(self.nu);
let info = Vec::from([
("# of examples", format!("{n_sample}")),
("# of features", format!("{n_feature}")),
("Tolerance", format!("{}", 2f64 * self.half_tolerance)),
("Max iteration", format!("{}", self.max_iter)),
("Capping (outliers)", format!("{nu} ({ratio: >7.3} %)"))
]);
Some(info)
}
fn preprocess<W>(
&mut self,
_weak_learner: &W,
)
where W: WeakLearner<Hypothesis = F>
{
self.sample.is_valid_binary_instance();
let n_sample = self.sample.shape().0;
let uni = 1.0 / n_sample as f64;
self.dist = vec![uni; n_sample];
self.max_iter = self.max_loop();
self.terminated = self.max_iter;
self.hypotheses = Vec::new();
self.gamma_hat = 1.0;
self.gamma_star = -1.0;
assert!((0.0..1.0).contains(&self.half_tolerance));
self.regularization_param();
self.init_solver();
}
fn boost<W>(
&mut self,
weak_learner: &W,
iteration: usize,
) -> ControlFlow<usize>
where W: WeakLearner<Hypothesis = F>,
{
if self.max_iter < iteration {
return ControlFlow::Break(self.max_iter);
}
let h = weak_learner.produce(self.sample, &self.dist[..]);
self.update_gamma_hat_mut(&h);
let diff = self.gamma_hat - self.gamma_star;
if diff <= self.half_tolerance {
self.terminated = iteration;
return ControlFlow::Break(iteration);
}
self.update_distribution_mut(&h);
self.hypotheses.push(h);
self.update_gamma_star_mut();
ControlFlow::Continue(())
}
fn postprocess<W>(
&mut self,
_weak_learner: &W,
) -> Self::Output
where W: WeakLearner<Hypothesis = F>
{
self.weights = self.qp_model.as_ref()
.expect("Failed to call `.as_ref()` to `self.qp_model`")
.borrow_mut()
.weight()
.collect::<Vec<_>>();
WeightedMajority::from_slices(&self.weights[..], &self.hypotheses[..])
}
}
impl<H> Research for ERLPBoost<'_, H>
where H: Classifier + Clone,
{
type Output = WeightedMajority<H>;
fn current_hypothesis(&self) -> Self::Output {
let weights = self.qp_model.as_ref()
.expect("Failed to call `.as_ref()` to `self.qp_model`")
.borrow_mut()
.weight()
.collect::<Vec<_>>();
WeightedMajority::from_slices(&weights[..], &self.hypotheses[..])
}
}