use num_traits::Float;
pub mod vec_ops;
#[cfg(test)]
mod tests;
pub trait LbfgsPrecision: Float {
const DEFAULT_SY_TOLERANCE: Self;
const ABS_TOL: Self;
const REL_TOL: Self;
}
impl LbfgsPrecision for f64 {
const DEFAULT_SY_TOLERANCE: f64 = 1e-10;
const ABS_TOL: f64 = 1e-8;
const REL_TOL: f64 = 1e-10;
}
impl LbfgsPrecision for f32 {
const DEFAULT_SY_TOLERANCE: f32 = 1e-8;
const ABS_TOL: f32 = 1e-5;
const REL_TOL: f32 = 1e-5;
}
#[derive(Debug)]
pub struct Lbfgs<T = f64>
where
T: LbfgsPrecision + std::iter::Sum<T>,
{
active_size: usize,
gamma: T,
s: Vec<Vec<T>>,
y: Vec<Vec<T>>,
alpha: Vec<T>,
rho: Vec<T>,
cbfgs_alpha: T,
cbfgs_epsilon: T,
sy_epsilon: T,
old_state: Vec<T>,
old_g: Vec<T>,
first_old: bool,
}
#[derive(Debug, Copy, Clone, PartialEq)]
pub enum UpdateStatus {
UpdateOk,
Rejection,
}
impl<T> Lbfgs<T>
where
T: LbfgsPrecision + std::iter::Sum<T>,
{
pub fn new(problem_size: usize, buffer_size: usize) -> Lbfgs<T> {
debug_assert!(problem_size > 0);
debug_assert!(buffer_size > 0);
Lbfgs {
active_size: 0,
gamma: T::one(),
s: vec![vec![T::zero(); problem_size]; buffer_size + 1], y: vec![vec![T::zero(); problem_size]; buffer_size + 1], alpha: vec![T::zero(); buffer_size],
rho: vec![T::zero(); buffer_size + 1],
cbfgs_alpha: T::zero(),
cbfgs_epsilon: T::zero(),
sy_epsilon: T::DEFAULT_SY_TOLERANCE,
old_state: vec![T::zero(); problem_size],
old_g: vec![T::zero(); problem_size],
first_old: true,
}
}
pub fn with_cbfgs_alpha(mut self, alpha: T) -> Self {
debug_assert!(alpha >= T::zero(), "Negative alpha");
self.cbfgs_alpha = alpha;
self
}
pub fn with_cbfgs_epsilon(mut self, epsilon: T) -> Self {
debug_assert!(epsilon >= T::zero(), "sy_epsilon must be non-negative");
self.cbfgs_epsilon = epsilon;
self
}
pub fn with_sy_epsilon(mut self, sy_epsilon: T) -> Self {
debug_assert!(sy_epsilon >= T::zero(), "sy_epsilon must be non-negative");
self.sy_epsilon = sy_epsilon;
self
}
pub fn reset(&mut self) {
self.active_size = 0;
self.first_old = true;
}
pub fn apply_hessian(&mut self, g: &mut [T]) {
debug_assert!(g.len() == self.old_g.len());
if self.active_size == 0 {
return;
}
let active_s = &self.s[0..self.active_size];
let active_y = &self.y[0..self.active_size];
let rho = &self.rho[0..self.active_size];
let alpha = &mut self.alpha;
let q = g;
for (s_k, (y_k, (rho_k, alpha_k))) in active_s
.iter()
.zip(active_y.iter().zip(rho.iter().zip(alpha.iter_mut())))
{
let a = *rho_k * vec_ops::inner_product(s_k, q);
*alpha_k = a;
vec_ops::inplace_vec_add(q, y_k, -a);
}
vec_ops::scalar_mult(q, self.gamma);
let r = q;
for (s_k, (y_k, (rho_k, alpha_k))) in active_s
.iter()
.zip(active_y.iter().zip(rho.iter().zip(alpha.iter())))
.rev()
{
let beta = *rho_k * vec_ops::inner_product(y_k, r);
vec_ops::inplace_vec_add(r, s_k, *alpha_k - beta);
}
}
fn new_s_and_y_valid(&mut self, g: &[T]) -> bool {
let s = self.s.last().unwrap();
let y = self.y.last().unwrap();
let rho = self.rho.last_mut().unwrap();
let ys = vec_ops::inner_product(s, y);
let norm_s_squared = vec_ops::inner_product(s, s);
*rho = T::one() / ys;
if norm_s_squared <= T::min_positive_value()
|| (self.sy_epsilon > T::zero() && ys <= self.sy_epsilon)
{
false
} else if self.cbfgs_epsilon > T::zero() && self.cbfgs_alpha > T::zero() {
let lhs_cbfgs = ys / norm_s_squared;
let rhs_cbfgs = self.cbfgs_epsilon * vec_ops::norm2(g).powf(self.cbfgs_alpha);
lhs_cbfgs > rhs_cbfgs
} else {
true
}
}
pub fn update_hessian(&mut self, g: &[T], state: &[T]) -> UpdateStatus {
debug_assert!(g.len() == self.old_state.len());
debug_assert!(state.len() == self.old_state.len());
if self.first_old {
self.first_old = false;
self.old_state.copy_from_slice(state);
self.old_g.copy_from_slice(g);
return UpdateStatus::UpdateOk;
}
vec_ops::difference_and_save(self.s.last_mut().unwrap(), &state, &self.old_state);
vec_ops::difference_and_save(self.y.last_mut().unwrap(), &g, &self.old_g);
if !self.new_s_and_y_valid(g) {
return UpdateStatus::Rejection;
}
self.old_state.copy_from_slice(state);
self.old_g.copy_from_slice(g);
self.s.rotate_right(1);
self.y.rotate_right(1);
self.rho.rotate_right(1);
self.gamma = (T::one() / self.rho[0]) / vec_ops::inner_product(&self.y[0], &self.y[0]);
self.active_size = (self.s.len() - 1).min(self.active_size + 1);
UpdateStatus::UpdateOk
}
}