use log::Level::*;
#[allow(unused_imports)]
use log::{debug, info, log_enabled, trace, warn};
use cpu_time::ProcessTime;
use std::time::SystemTime;
use rand::distributions::Distribution;
use rand::SeedableRng;
use ndarray::{Array, Dimension};
use rand_xoshiro::Xoshiro256PlusPlus;
use crate::monitor::*;
use crate::types::*;
pub struct SVRGDescent {
rng: Xoshiro256PlusPlus,
nb_mini_batch: usize,
step_size: f64,
}
impl SVRGDescent {
pub fn new(nb_mini_batch: usize, step_size: f64) -> SVRGDescent {
trace!(
" nb_mini_batch {:?} step_size {:2.4E} ",
nb_mini_batch,
step_size
);
SVRGDescent {
rng: Xoshiro256PlusPlus::seed_from_u64(4664397),
nb_mini_batch: nb_mini_batch,
step_size: step_size,
}
}
pub fn seed(&mut self, seed: [u8; 32]) {
self.rng = Xoshiro256PlusPlus::from_seed(seed);
}
fn get_step_size_at_jstep(&self, _j: usize) -> f64 {
self.step_size
}
fn get_nb_small_mini_batches(&self, _j: usize) -> usize {
self.nb_mini_batch
}
}
impl<D: Dimension, F: SummationC1<D>> Minimizer<D, F, usize> for SVRGDescent {
type Solution = Solution<D>;
fn minimize(
&self,
function: &F,
initial_position: &Array<f64, D>,
max_iterations: Option<usize>,
) -> Solution<D> {
let cpu_start = ProcessTime::now();
let sys_now = SystemTime::now();
let mut position = initial_position.clone();
let mut value = function.value(&position);
let nb_max_iterations = max_iterations.unwrap();
let mut direction: Array<f64, D> = position.clone();
direction.fill(0.);
if log_enabled!(Info) {
info!("Starting with y = {:e} for x = {:?}", value, position);
} else {
info!("Starting with y = {:e}", value);
}
trace!("nb_max_iterations {:?}", nb_max_iterations);
let mut iteration: usize = 0;
let nb_terms = function.terms();
let mut monitoring = IterationRes::<D>::new(nb_max_iterations, SolMode::Last);
let mut term_gradient_current: Array<f64, D>;
term_gradient_current = position.clone();
term_gradient_current.fill(0.);
let mut term_gradient_origin: Array<f64, D>;
term_gradient_origin = position.clone();
term_gradient_origin.fill(0.);
let mut rng = self.rng.clone();
loop {
let batch_gradient = function.gradient(&position);
let position_before_mini_batch = position.clone();
let n_j = self.get_nb_small_mini_batches(iteration);
for _k in 0..n_j {
let xsi: f64 = rand_distr::Standard.sample(&mut rng);
let term = (nb_terms as f64 * xsi).floor() as usize;
function.partial_gradient(&position, &[term], &mut term_gradient_current);
function.partial_gradient(
&position_before_mini_batch,
&[term],
&mut term_gradient_origin,
);
direction = &term_gradient_current - &term_gradient_origin + &batch_gradient;
position = position - self.get_step_size_at_jstep(iteration) * &direction;
} iteration += 1;
value = function.value(&position);
let gradnorm = norm_l2(&direction);
monitoring.push(value, &position, gradnorm);
if log_enabled!(Debug) {
trace!(" direction {:2.6E} ", &gradnorm);
debug!("\n\n Iteration {:?} y = {:2.4E}", iteration, value);
}
if iteration >= nb_max_iterations {
break;
}
} log::info!(
"\n SVRGDescent::minimize ; sys time(ms) {:?} cpu time(ms) {:?}",
sys_now.elapsed().unwrap().as_millis(),
cpu_start.elapsed().as_millis()
);
info!(
"Reached maximal number of iterations required {:?}, stopping optimization",
nb_max_iterations
);
info!("Reached maximal number of iterations required , stopping optimization");
let rank = monitoring.check_monoticity();
info!(" monotonous convergence from rank : {:?}", rank);
return Solution::new(position, value);
} }