1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
//! # lbfgs
//!
//! The `L-BFGS` is an Hessian approximation algorithm commonly used by optimization algorithms in
//! the family of quasi-Newton methods that approximates the Broyden–Fletcher–Goldfarb–Shanno (BFGS)
//! algorithm using a limited amount of computer memory. In this implementation extra condition are
//! added to have convergence properties for non-convex problems, based on the [C-BFGS conditions],
//! together with basic checks on the local curvature.
//!
//! [C-BFGS conditions]: https://pdfs.semanticscholar.org/5b90/45b7d27a53b1e3c3b3f0dc6aab908cc3e0b2.pdf
//!
//! # Example
//!
//! Create a fully featured instance of the L-BFGS algorithm with curvature and C-BFGS checks
//! enabled.
//!
//! ```
//! use lbfgs::*;
//!
//! fn main() {
//!     // Problem size and the number of stored vectors in L-BFGS cannot be zero
//!     let problem_size = NonZeroUsize::new(3).unwrap();
//!     let lbfgs_memory_size = NonZeroUsize::new(5).unwrap();
//!
//!     // Create the L-BFGS instance with curvature and C-BFGS checks enabled
//!     let mut lbfgs = Lbfgs::new(problem_size, lbfgs_memory_size)
//!         .with_sy_epsilon(1e-8)     // L-BFGS acceptance condition on s'*y > sy_espsilon
//!         .with_cbfgs_alpha(1.0)     // C-BFGS condition:
//!         .with_cbfgs_epsilon(1e-4); // y'*s/||s||^2 > epsilon * ||grad(x)||^alpha
//!
//!     // Starting value is always accepted (no s or y vectors yet)
//!     assert_eq!(
//!         lbfgs.update_hessian(&[0.0, 0.0, 0.0], &[0.0, 0.0, 0.0]),
//!         UpdateStatus::UpdateOk
//!     );
//!
//!     // Rejected because of CBFGS condition
//!     assert_eq!(
//!         lbfgs.update_hessian(&[-0.838, 0.260, 0.479], &[-0.5, 0.6, -1.2]),
//!         UpdateStatus::Rejection
//!     );
//!
//!     // This will fail because y'*s == 0 (curvature condition)
//!     assert_eq!(
//!         lbfgs.update_hessian(
//!             &[-0.5, 0.6, -1.2],
//!             &[0.419058177461747, 0.869843029576958, 0.260313940846084]
//!         ),
//!         UpdateStatus::Rejection
//!     );
//!
//!     // A proper update that will be accepted
//!     assert_eq!(
//!         lbfgs.update_hessian(&[-0.5, 0.6, -1.2], &[0.1, 0.2, -0.3]),
//!         UpdateStatus::UpdateOk
//!     );
//!
//!     // Apply Hessian approximation on a gradient
//!     let mut g = [-3.1, 1.5, 2.1];
//!     let correct_dir = [-1.100601247872944, -0.086568349404424, 0.948633011911515];
//!
//!     lbfgs.apply_hessian(&mut g);
//!
//!     assert!((g[0] - correct_dir[0]).abs() < 1e-12);
//!     assert!((g[1] - correct_dir[1]).abs() < 1e-12);
//!     assert!((g[2] - correct_dir[2]).abs() < 1e-12);
//! }
//! ```
//!
//! # Errors
//!
//! `update_hessian` will give errors if the C-BFGS or L-BFGS curvature conditions are not met.
//!
//! # Panics
//!
//! `with_sy_epsilon`, `with_cbfgs_alpha`, and `with_cbfgs_epsilon` will panic if given negative
//! values.
//!
//! `update_hessian` and `apply_hessian` will panic if given slices which are not the same length
//! as the `problem_size`.
//!

extern crate num;
pub use std::num::NonZeroUsize;

pub mod vec_ops;

#[cfg(test)]
mod tests;

/// The default `sy_epsilon`
pub const DEFAULT_SY_EPSILON: f64 = 1e-10;

/// LBFGS Buffer
///
/// The Limited-memory BFGS algorithm is used to estimate curvature information for the
/// gradient of a function as well as other operators and is often used in numerical
/// optimization and numerical methods in general.
///
/// `Lbfgs` maintains a buffer of pairs `(s,y)` and values `rho` (inverse of inner products
/// of `s` and `y`)
///
///
#[derive(Debug)]
pub struct Lbfgs {
    /// The number of vectors in s and y that are currently in use
    active_size: usize,
    /// Used to warm-start the Hessian estimation with H_0 = gamma * I
    gamma: f64,
    /// s holds the vectors of state difference s_k = x_{k+1} - x_k, s_0 holds the most recent s
    s: Vec<Vec<f64>>,
    /// y holds the vectors of the function g (usually cost function gradient) difference:
    /// y_k = g_{k+1} - g_k, y_0 holds the most recent y
    y: Vec<Vec<f64>>,
    /// Intermediary storage for the forward L-BFGS pass
    alpha: Vec<f64>,
    /// Intermediary storage for the forward L-BFGS pass
    rho: Vec<f64>,
    /// The alpha parameter of the C-BFGS criterion
    cbfgs_alpha: f64,
    /// The epsilon parameter of the C-BFGS criterion
    cbfgs_epsilon: f64,
    /// Limit on the inner product s'*y for acceptance in the buffer
    sy_epsilon: f64,
    /// Holds the state of the last `update_hessian`, used to calculate the `s_k` vectors
    old_state: Vec<f64>,
    /// Holds the g of the last `update_hessian`, used to calculate the `y_k` vectors
    old_g: Vec<f64>,
    /// Check to see if the `old_*` variables have valid data
    first_old: bool,
}

#[derive(Debug, Copy, Clone, PartialEq)]
pub enum UpdateStatus {
    /// The g and state was accepted to update the Hessian estimate
    UpdateOk,
    /// The g and state was rejected by the C-BFGS criteria
    Rejection,
}

impl Lbfgs {
    /// Create a new L-BFGS instance with a specific problem and L-BFGS buffer size
    pub fn new(problem_size: NonZeroUsize, buffer_size: NonZeroUsize) -> Lbfgs {
        let problem_size = problem_size.get();
        let buffer_size = buffer_size.get();

        Lbfgs {
            active_size: 0,
            gamma: 1.0,
            s: vec![vec![0.0; problem_size]; buffer_size + 1], // +1 for the temporary checking area
            y: vec![vec![0.0; problem_size]; buffer_size + 1], // +1 for the temporary checking area
            alpha: vec![0.0; buffer_size],
            rho: vec![0.0; buffer_size + 1],
            cbfgs_alpha: 0.0,
            cbfgs_epsilon: 0.0,
            sy_epsilon: DEFAULT_SY_EPSILON,
            old_state: vec![0.0; problem_size],
            old_g: vec![0.0; problem_size],
            first_old: true,
        }
    }

    /// Update the default C-BFGS alpha
    pub fn with_cbfgs_alpha(mut self, alpha: f64) -> Self {
        assert!(alpha >= 0.0, "Negative alpha");

        self.cbfgs_alpha = alpha;
        self
    }

    /// Update the default C-BFGS epsilon
    pub fn with_cbfgs_epsilon(mut self, epsilon: f64) -> Self {
        assert!(epsilon >= 0.0);

        self.cbfgs_epsilon = epsilon;
        self
    }

    /// Update the default sy_epsilon
    pub fn with_sy_epsilon(mut self, sy_epsilon: f64) -> Self {
        assert!(sy_epsilon >= 0.0);

        self.sy_epsilon = sy_epsilon;
        self
    }

    /// "Empties" the buffer
    ///
    /// This is a cheap operation as it amount to setting certain internal flags
    pub fn reset(&mut self) {
        self.active_size = 0;
        self.first_old = true;
    }

    /// Apply the current Hessian estimate to an input vector
    pub fn apply_hessian(&mut self, g: &mut [f64]) {
        assert!(g.len() == self.old_g.len());

        if self.active_size == 0 {
            // No Hessian available, the g is the best we can do for now
            return;
        }

        let active_s = &self.s[0..self.active_size];
        let active_y = &self.y[0..self.active_size];
        let rho = &self.rho[0..self.active_size];
        let alpha = &mut self.alpha;

        let q = g;

        // Perform the forward L-BFGS algorithm
        for (s_k, (y_k, (rho_k, alpha_k))) in active_s
            .iter()
            .zip(active_y.iter().zip(rho.iter().zip(alpha.iter_mut())))
        {
            let a = rho_k * vec_ops::inner_product(s_k, q);

            *alpha_k = a;

            vec_ops::inplace_vec_add(q, y_k, -a);
        }

        // Apply the initial Hessian estimate and form r = H_0 * q, where H_0 = gamma * I
        vec_ops::scalar_mult(q, self.gamma);
        let r = q;

        // Perform the backward L-BFGS algorithm
        for (s_k, (y_k, (rho_k, alpha_k))) in active_s
            .iter()
            .zip(active_y.iter().zip(rho.iter().zip(alpha.iter())))
            .rev()
        {
            let beta = rho_k * vec_ops::inner_product(y_k, r);

            vec_ops::inplace_vec_add(r, s_k, alpha_k - beta);
        }

        // The g with the Hessian applied is available in the input g
        // r = H_k * grad f
    }

    /// Check the validity of the newly added s and y vectors. Based on the condition in:
    /// D.-H. Li and M. Fukushima, "On the global convergence of the BFGS method for nonconvex
    /// unconstrained optimization problems," vol. 11, no. 4, pp. 1054–1064, jan 2001.
    fn new_s_and_y_valid(&mut self, g: &[f64]) -> bool {
        let s = self.s.last().unwrap();
        let y = self.y.last().unwrap();
        let rho = self.rho.last_mut().unwrap();
        let ys = vec_ops::inner_product(s, y);
        let norm_s_squared = vec_ops::inner_product(s, s);

        *rho = 1.0 / ys;

        if norm_s_squared <= std::f64::MIN_POSITIVE
            || (self.sy_epsilon > 0.0 && ys <= self.sy_epsilon)
        {
            // In classic L-BFGS, the buffer should be updated only if
            // y'*s is strictly positive and |s| is nonzero
            false
        } else if self.cbfgs_epsilon > 0.0 && self.cbfgs_alpha > 0.0 {
            // Check the CBFGS condition of Li and Fukushima
            // Condition: (y^T * s) / ||s||^2 > epsilon * ||grad(x)||^alpha
            let lhs_cbfgs = ys / norm_s_squared;
            let rhs_cbfgs = self.cbfgs_epsilon * vec_ops::norm2(g).powf(self.cbfgs_alpha);

            lhs_cbfgs > rhs_cbfgs
        } else {
            // The standard L-BFGS conditions are satisfied and C-BFGS is
            // not active (either cbfgs_epsilon <= 0.0 or cbfgs_alpha <= 0.0)
            true
        }
    }

    /// Saves vectors to update the Hessian estimate
    pub fn update_hessian(&mut self, g: &[f64], state: &[f64]) -> UpdateStatus {
        assert!(g.len() == self.old_state.len());
        assert!(state.len() == self.old_state.len());

        // First iteration, only save
        if self.first_old {
            self.first_old = false;

            self.old_state.copy_from_slice(state);
            self.old_g.copy_from_slice(g);

            return UpdateStatus::UpdateOk;
        }

        // Form the new s_k in the temporary area
        vec_ops::difference_and_save(self.s.last_mut().unwrap(), &state, &self.old_state);

        // Form the new y_k in the temporary area
        vec_ops::difference_and_save(self.y.last_mut().unwrap(), &g, &self.old_g);

        // Check that the s and y are valid to use
        if !self.new_s_and_y_valid(g) {
            return UpdateStatus::Rejection;
        }

        self.old_state.copy_from_slice(state);
        self.old_g.copy_from_slice(g);

        // Move the new s_0,  y_0 and rho_0 to the front
        self.s.rotate_right(1);
        self.y.rotate_right(1);
        self.rho.rotate_right(1);

        // Update the Hessian estimate
        self.gamma = (1.0 / self.rho[0]) / vec_ops::inner_product(&self.y[0], &self.y[0]);

        // Update the indexes and number of active, -1 comes from the temporary area used in
        // the end of s and y to check if they are valid
        self.active_size = (self.s.len() - 1).min(self.active_size + 1);

        UpdateStatus::UpdateOk
    }
}