miniboosts 0.2.1

//! This file defines some options of MLPBoost.

use crate::{
    Sample,
    Classifier,
    common::{utils, checker},
};


const SUB_TOLERANCE: f64 = 1e-9;


/// FWType updates.
/// These options correspond to the Frank-Wolfe strategies.
#[derive(Clone, Copy)]
pub enum FWType {
    /// Classic step size, `2 / (t + 2)`.
    Classic,

    /// Short-step size,
    /// Adopt the step size that minimizes the strongly-smooth bound.
    ShortStep,

    /// Line-search step size,
    /// Adopt the best step size on the descent direction.
    LineSearch,

    // /// Pairwise strategy, 
    // /// See [this paper](https://arxiv.org/abs/1511.05932). 
    // Pairwise,
}


pub(crate) struct FrankWolfe {
    eta: f64, // Strongly-smooth parameter
    nu: f64,
    fw_type: FWType,
}


impl FrankWolfe {
    /// Create a new FrankWolfe instance
    pub(crate) fn new(eta: f64, nu: f64, fw_type: FWType) -> Self {
        Self { eta, nu, fw_type, }
    }


    /// Set `self.eta`.
    pub(crate) fn eta(&mut self, eta: f64) {
        self.eta = eta;
    }


    /// Set `nu`.
    pub(crate) fn nu(&mut self, nu: f64) {
        self.nu = nu;
    }


    /// Update the Frank-Wolfe type.
    /// Currently, you can choose types from below:
    /// - Classic. `1.0 / t + 1.0` for `t >= 1`.
    /// - ShortStep. The step size that minimizes the strongly-smooth bound.
    /// - LineSearch. Optimal step size that minimizes the objective.
    /// - Pairwise. Pairwise update.
    pub(crate) fn fw_type(&mut self, fw_type: FWType) {
        self.fw_type = fw_type;
    }


    /// Returns the next weights on hypotheses
    pub(crate) fn next_iterate<H>(
        &self,
        iteration: usize,
        sample: &Sample,
        dist: &[f64],
        hypotheses: &[H],
        position_of_new_one: usize,
        weights: Vec<f64>,
    ) -> Vec<f64>
        where H: Classifier,
    {
        let n_hypotheses = hypotheses.len();
        assert!((0..n_hypotheses).contains(&position_of_new_one));
        match self.fw_type {
            FWType::Classic
                => self.classic(iteration, position_of_new_one, weights),
            FWType::ShortStep
                => self.short_step(
                    sample, dist,
                    hypotheses, position_of_new_one, weights,
                ),
            FWType::LineSearch
                => self.line_search(
                    sample, hypotheses, position_of_new_one, weights,
                ),
        }
    }


    fn classic(
        &self,
        iteration: usize,
        position_of_new_one: usize,
        weights: Vec<f64>,
    ) -> Vec<f64>
    {
        // Compute the step-size
        let step_size = 2.0_f64 / ((iteration + 1) as f64);

        // Update the weights
        interior_point(step_size, position_of_new_one, weights)
    }


    fn short_step<H>(
        &self,
        sample: &Sample,
        dist: &[f64],
        hypotheses: &[H],
        position_of_new_one: usize,
        weights: Vec<f64>,
    ) -> Vec<f64>
        where H: Classifier,
    {
        if hypotheses.len() == 1 {
            return vec![1.0];
        }


        let h = &hypotheses[position_of_new_one];

        let mut numer: f64 = 0.0;
        let mut denom: f64 = f64::MIN;

        let old_margins = utils::margins_of_weighted_hypothesis(
            sample, &weights[..], hypotheses,
        );
        let new_margins = utils::margins_of_hypothesis(sample, h);

        new_margins.into_iter()
            .zip(old_margins)
            .zip(dist)
            .for_each(|((ynew, yold), &d)| {
                let diff = ynew - yold;
                numer += d * diff;
                denom = denom.max(diff.abs());
            });

        let step = numer / (self.eta * denom.powi(2));

        // Clip the step size to `[0, 1]`.
        let step_size = step.max(0.0_f64).min(1.0_f64);


        // Update the weights
        interior_point(step_size, position_of_new_one, weights)
    }


    fn line_search<H>(
        &self,
        sample: &Sample,
        hypotheses: &[H],
        position_of_new_one: usize,
        mut weights: Vec<f64>,
    ) -> Vec<f64>
        where H: Classifier,
    {
        // base: w
        // dir:  e_h - w
        let base = weights.clone();
        let dir  = weights.iter()
            .copied()
            .enumerate()
            .map(|(j, w)|
                if j == position_of_new_one { 1.0 - w } else { -w }
            )
            .collect::<Vec<_>>();

        // A(e_h - w)
        let dir_margins = utils::margins_of_weighted_hypothesis(
            sample, &dir[..], hypotheses,
        );
        // Aw
        let base_margins = utils::margins_of_weighted_hypothesis(
            sample, &base[..], hypotheses,
        );


        // Check the case where the step size is `1`.
        let dist = utils::exp_distribution(
            self.eta, self.nu, sample, &dir[..], hypotheses,
        );


        // `dot` is `d * A (e_h - w)`
        let dot = utils::inner_product(&dist[..], &dir_margins[..]);


        if dot <= 0.0 {
            let n_weights = weights.len();
            weights = vec![0.0; n_weights];
            weights[position_of_new_one] = 1.0;
            return weights;
        }



        let mut ub = 1.0;
        let mut lb = 0.0;
        while ub - lb > SUB_TOLERANCE {
            let step_size = (lb + ub) / 2.0;

            let margins = base_margins.iter()
                .zip(&dir_margins[..])
                .map(|(&b, &d)| b + step_size * d);
            let dist = utils::exp_distribution_from_margins(
                self.eta, self.nu, margins,
            );


            // Compute the gradient for the direction `dir`.
            let dot = utils::inner_product(&dist[..], &dir_margins[..]);

            if dot < 0.0 {
                lb = step_size;
            } else if dot > 0.0 {
                ub = step_size;
            } else {
                break;
            }
        }

        // Update the weights
        let step_size = (lb + ub) / 2.0;


        interior_point(step_size, position_of_new_one, base)
        // base.into_iter()
        //     .zip(dir)
        //     .map(|(b, d)| b + step_size * d)
        //     .collect::<Vec<_>>()
    }
}


/// Take the interior point of the given two arrays.
pub(crate) fn interior_point(
    step_size: f64,
    new_basis: usize,
    base: Vec<f64>,
) -> Vec<f64>
{
    checker::check_stepsize(step_size);
    base.into_iter()
        .enumerate()
        .map(|(j, b)| {
            let dir = if j == new_basis { 1.0 - b } else { -b };

            b + step_size * dir
        })
        .collect()
}