use crate::core::math::Dot;
use crate::core::problem::EvalCounts;
use crate::core::state::{CountsMirror, GradientState, State};
pub struct LbfgsState<V> {
pub(crate) param: V,
pub(crate) cost: Option<f64>,
pub(crate) gradient: Option<V>,
pub(crate) m_capacity: usize,
pub(crate) ws: Vec<V>,
pub(crate) wy: Vec<V>,
pub(crate) sy: Vec<f64>,
pub(crate) ss: Vec<f64>,
pub(crate) theta: f64,
pub(crate) iter: u64,
pub(crate) cost_evals: u64,
pub(crate) gradient_evals: u64,
pub(crate) work: Option<LbfgsbWork>,
}
pub(crate) struct LbfgsbWork {
pub(crate) wn: Vec<f64>,
pub(crate) wn1: Vec<f64>,
pub(crate) wt: Vec<f64>,
pub(crate) z: Vec<f64>,
pub(crate) r: Vec<f64>,
pub(crate) d: Vec<f64>,
pub(crate) t_buf: Vec<f64>,
pub(crate) xp: Vec<f64>,
pub(crate) wa_p: Vec<f64>,
pub(crate) wa_c: Vec<f64>,
pub(crate) wa_wbp: Vec<f64>,
pub(crate) wa_v: Vec<f64>,
pub(crate) iwhere: Vec<i8>,
pub(crate) index: Vec<usize>,
pub(crate) indx2: Vec<usize>,
pub(crate) cnstnd: bool,
pub(crate) boxed: bool,
pub(crate) updatd: bool,
pub(crate) iupdat: u32,
pub(crate) dnorm: f64,
pub(crate) gdold: f64,
pub(crate) nfree: usize,
}
impl LbfgsbWork {
pub(crate) fn new(n: usize, m: usize) -> Self {
let two_m = 2 * m;
Self {
wn: vec![0.0; two_m * two_m],
wn1: vec![0.0; two_m * two_m],
wt: vec![0.0; m * m],
z: vec![0.0; n],
r: vec![0.0; n],
d: vec![0.0; n],
t_buf: vec![0.0; n],
xp: vec![0.0; n],
wa_p: vec![0.0; two_m],
wa_c: vec![0.0; two_m],
wa_wbp: vec![0.0; two_m],
wa_v: vec![0.0; two_m],
iwhere: vec![0; n],
index: (0..n).collect(),
indx2: vec![0; n],
cnstnd: false,
boxed: true,
updatd: false,
iupdat: 0,
dnorm: 0.0,
gdold: 0.0,
nfree: n,
}
}
pub(crate) fn reset_history(&mut self) {
self.iupdat = 0;
self.updatd = false;
}
}
impl<V> LbfgsState<V> {
pub fn new(param: V, m_capacity: usize) -> Self {
assert!(m_capacity >= 1, "m_capacity must be ≥ 1");
let mm = m_capacity * m_capacity;
Self {
param,
cost: None,
gradient: None,
m_capacity,
ws: Vec::with_capacity(m_capacity),
wy: Vec::with_capacity(m_capacity),
sy: vec![0.0; mm],
ss: vec![0.0; mm],
theta: 1.0,
iter: 0,
cost_evals: 0,
gradient_evals: 0,
work: None,
}
}
#[allow(dead_code)]
pub(crate) fn col(&self) -> usize {
self.ws.len()
}
pub(crate) fn append_pair(&mut self, s: V, y: V) -> bool
where
V: Dot,
{
let sy_dot = s.dot(&y);
let yy_dot = y.dot(&y);
if !(sy_dot > 0.0 && sy_dot.is_finite() && yy_dot.is_finite()) {
return false;
}
let m = self.m_capacity;
if self.ws.len() == m {
self.ws.remove(0);
self.wy.remove(0);
for i in 0..m - 1 {
for j in 0..m - 1 {
self.sy[i * m + j] = self.sy[(i + 1) * m + (j + 1)];
self.ss[i * m + j] = self.ss[(i + 1) * m + (j + 1)];
}
}
for i in 0..m {
self.sy[i * m + (m - 1)] = 0.0;
self.sy[(m - 1) * m + i] = 0.0;
self.ss[i * m + (m - 1)] = 0.0;
self.ss[(m - 1) * m + i] = 0.0;
}
}
let new_idx = self.ws.len();
for i in 0..new_idx {
let s_i_y_new = self.ws[i].dot(&y);
let s_new_y_i = s.dot(&self.wy[i]);
let s_i_s_new = self.ws[i].dot(&s);
self.sy[i * m + new_idx] = s_i_y_new;
self.sy[new_idx * m + i] = s_new_y_i;
self.ss[i * m + new_idx] = s_i_s_new;
self.ss[new_idx * m + i] = s_i_s_new;
}
self.sy[new_idx * m + new_idx] = sy_dot;
self.ss[new_idx * m + new_idx] = s.dot(&s);
self.theta = yy_dot / sy_dot;
self.ws.push(s);
self.wy.push(y);
true
}
}
impl<V> State for LbfgsState<V> {
type Param = V;
type Float = f64;
fn iter(&self) -> u64 {
self.iter
}
fn increment_iter(&mut self) {
self.iter += 1;
}
fn cost_evals(&self) -> u64 {
self.cost_evals
}
fn param(&self) -> &V {
&self.param
}
fn cost(&self) -> f64 {
self.cost
.expect("LbfgsState::cost read before Solver::init populated it")
}
}
impl<V> GradientState for LbfgsState<V> {
fn gradient(&self) -> Option<&V> {
self.gradient.as_ref()
}
fn gradient_evals(&self) -> u64 {
self.gradient_evals
}
}
impl<V> CountsMirror for LbfgsState<V> {
fn mirror(&mut self, delta: &EvalCounts) {
self.cost_evals = delta.cost_evals + delta.residual_evals;
self.gradient_evals = delta.gradient_evals + delta.jacobian_evals + delta.hessian_evals;
}
}
#[cfg(test)]
#[allow(clippy::identity_op, clippy::erasing_op)]
mod tests {
use super::*;
#[test]
fn new_state_is_empty() {
let s = LbfgsState::<Vec<f64>>::new(vec![0.0; 4], 5);
assert_eq!(s.col(), 0);
assert_eq!(s.m_capacity, 5);
assert_eq!(s.theta, 1.0);
assert!(s.cost.is_none());
assert!(s.gradient.is_none());
assert_eq!(s.ws.len(), 0);
assert_eq!(s.wy.len(), 0);
assert_eq!(s.sy.len(), 25);
assert_eq!(s.ss.len(), 25);
}
#[test]
fn first_append_sets_theta_and_diagonal() {
let mut state = LbfgsState::<Vec<f64>>::new(vec![0.0, 0.0], 3);
let s = vec![1.0, 2.0]; let y = vec![3.0, 4.0]; let ok = state.append_pair(s, y);
assert!(ok);
assert_eq!(state.col(), 1);
assert_eq!(state.theta, 25.0 / 11.0);
assert_eq!(state.sy[0], 11.0); assert_eq!(state.ss[0], 5.0); }
#[test]
fn second_append_fills_off_diagonal_gram_blocks() {
let mut state = LbfgsState::<Vec<f64>>::new(vec![0.0; 2], 3);
let s1 = vec![1.0, 0.0];
let y1 = vec![2.0, 0.0];
let s2 = vec![0.0, 3.0];
let y2 = vec![0.0, 4.0];
state.append_pair(s1, y1);
state.append_pair(s2, y2);
assert_eq!(state.sy[0 * 3 + 0], 2.0);
assert_eq!(state.sy[0 * 3 + 1], 0.0);
assert_eq!(state.sy[1 * 3 + 0], 0.0);
assert_eq!(state.sy[1 * 3 + 1], 12.0);
assert_eq!(state.ss[0 * 3 + 0], 1.0);
assert_eq!(state.ss[0 * 3 + 1], 0.0);
assert_eq!(state.ss[1 * 3 + 0], 0.0);
assert_eq!(state.ss[1 * 3 + 1], 9.0);
assert_eq!(state.theta, 16.0 / 12.0);
}
#[test]
fn appending_beyond_capacity_drops_oldest() {
let mut state = LbfgsState::<Vec<f64>>::new(vec![0.0], 2);
let s1 = vec![1.0];
let y1 = vec![2.0];
let s2 = vec![3.0];
let y2 = vec![4.0];
let s3 = vec![5.0];
let y3 = vec![6.0];
state.append_pair(s1.clone(), y1.clone());
state.append_pair(s2.clone(), y2.clone());
state.append_pair(s3.clone(), y3.clone());
assert_eq!(state.col(), 2);
assert_eq!(state.ws[0], s2);
assert_eq!(state.ws[1], s3);
assert_eq!(state.wy[0], y2);
assert_eq!(state.wy[1], y3);
let m = 2;
assert_eq!(state.sy[0 * m + 0], 12.0);
assert_eq!(state.sy[0 * m + 1], 18.0);
assert_eq!(state.sy[1 * m + 0], 20.0);
assert_eq!(state.sy[1 * m + 1], 30.0);
}
#[test]
fn curvature_failure_leaves_state_untouched() {
let mut state = LbfgsState::<Vec<f64>>::new(vec![0.0, 0.0], 3);
let s = vec![1.0, 0.0];
let y = vec![-1.0, 0.0];
let ok = state.append_pair(s, y);
assert!(!ok);
assert_eq!(state.col(), 0);
assert_eq!(state.theta, 1.0);
}
#[test]
fn state_implements_state_and_gradient_state_traits() {
let s: LbfgsState<Vec<f64>> = LbfgsState::new(vec![1.0, 2.0], 5);
let p: &Vec<f64> = State::param(&s);
assert_eq!(p, &vec![1.0, 2.0]);
assert!(GradientState::gradient(&s).is_none());
assert_eq!(GradientState::gradient_evals(&s), 0);
assert_eq!(State::iter(&s), 0);
assert_eq!(State::cost_evals(&s), 0);
}
}