Skip to main content

pounce_algorithm/sqp/
lbfgs.rs

1//! Limited-memory Powell-damped BFGS Hessian approximation for
2//! SQP (Nocedal-Wright §7.2; Byrd-Nocedal-Schnabel 1994; Powell
3//! 1978). Used when `SqpOptions::hessian = Lbfgs`.
4//!
5//! Maintains a fixed-length circular buffer of `(s, y)` pairs and,
6//! at each `as_triplet` query, reconstructs the dense Hessian B_k
7//! by replaying the rank-2 BFGS updates from a scaled-identity
8//! seed `B_0 = γ_k I` with `γ_k = (s_{last}·y_{last}) /
9//! (y_{last}·y_{last})` (Nocedal-Wright eq. 7.20 — the standard
10//! initial-scaling heuristic).
11//!
12//! Phase 5b commit 15 implements the dense materialization path
13//! (output is a full upper-triangular [`Triplet`] consumed by
14//! `pounce-qp`'s `SqpQpData`). A future commit can add a
15//! matrix-free product interface so the QP can avoid the
16//! `O(n²)` storage when `m_history ≪ n`.
17//!
18//! Powell damping is identical to [`crate::sqp::bfgs::DampedBfgs`]:
19//!
20//! ```text
21//!     if s·y ≥ 0.2 · s·B s :  θ = 1
22//!     else                  :  θ = 0.8 · s·B s / (s·B s − s·y)
23//!     y_damp = θ y + (1 − θ) B s
24//! ```
25//!
26//! …but `B` here is the rebuilt one, accumulated within
27//! `materialize`, so the damping factor for each historical pair
28//! is computed at replay time against the running B (matching
29//! how a full BFGS would have evolved).
30
31use crate::sqp::qp_assembly::Triplet;
32use pounce_common::types::{Index, Number};
33use std::collections::VecDeque;
34
35/// Limited-memory Powell-damped BFGS, storing up to `m_history`
36/// most-recent `(s, y)` pairs.
37pub struct LBfgs {
38    n: usize,
39    m_history: usize,
40    /// Circular buffer of (s_k, y_k) pairs, oldest first.
41    pairs: VecDeque<(Vec<Number>, Vec<Number>)>,
42    /// Previous `x` and `∇L`; the next call to [`Self::update`]
43    /// builds the next `(s, y)` from these.
44    prev_x: Option<Vec<Number>>,
45    prev_grad_lag: Option<Vec<Number>>,
46    /// 1-based upper-triangle sparsity pattern (matches DampedBfgs).
47    h_irow: Vec<Index>,
48    h_jcol: Vec<Index>,
49}
50
51impl LBfgs {
52    /// `m_history` is the number of `(s, y)` pairs to keep
53    /// (Nocedal-Wright recommends 3–20; upstream IPOPT defaults to
54    /// 6). Must be ≥ 1; a value of 0 would degenerate to plain
55    /// identity which is rarely useful.
56    pub fn new(n: usize, m_history: usize) -> Self {
57        debug_assert!(m_history >= 1);
58        let nz = n * (n + 1) / 2;
59        let mut h_irow = Vec::with_capacity(nz);
60        let mut h_jcol = Vec::with_capacity(nz);
61        for i in 0..n {
62            for j in 0..=i {
63                h_irow.push((i + 1) as Index);
64                h_jcol.push((j + 1) as Index);
65            }
66        }
67        Self {
68            n,
69            m_history,
70            pairs: VecDeque::with_capacity(m_history),
71            prev_x: None,
72            prev_grad_lag: None,
73            h_irow,
74            h_jcol,
75        }
76    }
77
78    pub fn has_prev(&self) -> bool {
79        self.prev_x.is_some()
80    }
81
82    /// Record a new pair `(s, y) = (x_new − x_prev, ∇L_new −
83    /// ∇L_prev)` if a prior iterate exists. The first call merely
84    /// stores `(x_new, ∇L_new)`.
85    pub fn update(&mut self, x_new: &[Number], grad_lag_new: &[Number]) {
86        // Hard assert (PR #50 review S5): see BFGS::update.
87        assert_eq!(x_new.len(), self.n, "LBFGS::update: x_new.len() != n");
88        assert_eq!(
89            grad_lag_new.len(),
90            self.n,
91            "LBFGS::update: grad_lag_new.len() != n"
92        );
93
94        if let (Some(prev_x), Some(prev_grad_lag)) =
95            (self.prev_x.as_ref(), self.prev_grad_lag.as_ref())
96        {
97            let s: Vec<Number> = x_new
98                .iter()
99                .zip(prev_x.iter())
100                .map(|(a, b)| a - b)
101                .collect();
102            let y: Vec<Number> = grad_lag_new
103                .iter()
104                .zip(prev_grad_lag.iter())
105                .map(|(a, b)| a - b)
106                .collect();
107            // Skip degenerate pairs (s ≈ 0).
108            let s_norm2: Number = s.iter().map(|v| v * v).sum();
109            if s_norm2 > 1e-30 {
110                if self.pairs.len() == self.m_history {
111                    self.pairs.pop_front();
112                }
113                self.pairs.push_back((s, y));
114            }
115        }
116
117        self.prev_x = Some(x_new.to_vec());
118        self.prev_grad_lag = Some(grad_lag_new.to_vec());
119    }
120
121    /// Materialize the current B_k as a dense `Triplet` over the
122    /// upper triangle. Always returns the full `n(n+1)/2` triplets
123    /// (the same fixed pattern across iterations, so the QP
124    /// solver's symbolic factorization stays valid).
125    pub fn as_triplet(&self) -> Triplet {
126        let b_dense = self.materialize();
127        let mut vals = Vec::with_capacity(self.h_irow.len());
128        for i in 0..self.n {
129            for j in 0..=i {
130                vals.push(b_dense[i * self.n + j]);
131            }
132        }
133        Triplet {
134            n_rows: self.n,
135            n_cols: self.n,
136            irow: self.h_irow.clone(),
137            jcol: self.h_jcol.clone(),
138            vals,
139        }
140    }
141
142    /// Build the dense `n×n` row-major B_k by seeding `B_0 = γI`
143    /// (Nocedal-Wright eq. 7.20) and replaying Powell-damped BFGS
144    /// updates for every stored pair. Returned values are
145    /// symmetric (only the lower triangle is read by `as_triplet`).
146    fn materialize(&self) -> Vec<Number> {
147        let n = self.n;
148        // Initial scaling γ: most-recent (s, y) ratio. Defaults to
149        // 1.0 when no pairs exist or sᵀy is tiny.
150        let gamma = self
151            .pairs
152            .back()
153            .and_then(|(s, y)| {
154                let sy: Number = s.iter().zip(y.iter()).map(|(a, b)| a * b).sum();
155                let yy: Number = y.iter().map(|v| v * v).sum();
156                if yy > 1e-30 && sy > 1e-30 {
157                    Some(yy / sy)
158                } else {
159                    None
160                }
161            })
162            .unwrap_or(1.0);
163        let mut b = vec![0.0_f64; n * n];
164        for i in 0..n {
165            b[i * n + i] = gamma;
166        }
167
168        for (s, y) in self.pairs.iter() {
169            // bs = B · s
170            let mut bs = vec![0.0_f64; n];
171            for i in 0..n {
172                let mut acc = 0.0_f64;
173                let row = &b[i * n..i * n + n];
174                for j in 0..n {
175                    acc += row[j] * s[j];
176                }
177                bs[i] = acc;
178            }
179            let s_bs: Number = s.iter().zip(bs.iter()).map(|(a, b)| a * b).sum();
180            let s_y: Number = s.iter().zip(y.iter()).map(|(a, b)| a * b).sum();
181
182            let theta = if s_y >= 0.2 * s_bs {
183                1.0
184            } else if s_bs - s_y > 1e-14 {
185                0.8 * s_bs / (s_bs - s_y)
186            } else {
187                1.0
188            };
189            let y_damp: Vec<Number> = y
190                .iter()
191                .zip(bs.iter())
192                .map(|(yi, bsi)| theta * yi + (1.0 - theta) * bsi)
193                .collect();
194            let s_y_damp: Number = s.iter().zip(y_damp.iter()).map(|(a, b)| a * b).sum();
195
196            if s_bs > 1e-14 && s_y_damp > 1e-14 {
197                for i in 0..n {
198                    for j in 0..n {
199                        let delta = -(bs[i] * bs[j]) / s_bs + (y_damp[i] * y_damp[j]) / s_y_damp;
200                        b[i * n + j] += delta;
201                    }
202                }
203            }
204        }
205        b
206    }
207}
208
209#[cfg(test)]
210mod tests {
211    use super::*;
212
213    #[test]
214    fn lbfgs_seeds_identity_with_no_pairs() {
215        let lb = LBfgs::new(3, 5);
216        let t = lb.as_triplet();
217        // Diagonal should be 1.0, off-diagonals 0.0.
218        for k in 0..t.vals.len() {
219            let i = (t.irow[k] - 1) as usize;
220            let j = (t.jcol[k] - 1) as usize;
221            let expected = if i == j { 1.0 } else { 0.0 };
222            assert!(
223                (t.vals[k] - expected).abs() < 1e-15,
224                "B[{i},{j}] = {} but expected {expected}",
225                t.vals[k]
226            );
227        }
228    }
229
230    #[test]
231    fn lbfgs_first_update_only_records_pair() {
232        let mut lb = LBfgs::new(2, 3);
233        lb.update(&[0.0, 0.0], &[1.0, 1.0]);
234        assert!(lb.has_prev());
235        assert!(lb.pairs.is_empty());
236        // Without any pairs the matrix is still γI = I.
237        let t = lb.as_triplet();
238        let diag: Vec<_> = t
239            .vals
240            .iter()
241            .enumerate()
242            .filter(|(k, _)| t.irow[*k] == t.jcol[*k])
243            .map(|(_, v)| *v)
244            .collect();
245        assert!((diag[0] - 1.0).abs() < 1e-15);
246        assert!((diag[1] - 1.0).abs() < 1e-15);
247    }
248
249    #[test]
250    fn lbfgs_quadratic_recovers_exact_hessian_at_convergence() {
251        // For the quadratic f(x) = ½ xᵀ A x with A = diag(2, 4),
252        // ∇f = Ax, so along iterates x_k = x_{k-1} + s_{k-1}:
253        // y_{k-1} = A s_{k-1}. Pumping ≥ 2 linearly independent
254        // (s, y) pairs into L-BFGS must rebuild B_2 = A exactly
255        // (up to numerical roundoff) because the rank-2 corrections
256        // collapse onto A in 2-D.
257        let mut lb = LBfgs::new(2, 5);
258        // Pair 1: s = (1, 0), y = A·s = (2, 0).
259        lb.update(&[0.0, 0.0], &[0.0, 0.0]); // record start
260        lb.update(&[1.0, 0.0], &[2.0, 0.0]); // produces (s, y) #1
261        lb.update(&[1.0, 1.0], &[2.0, 4.0]); // produces (s, y) #2 with s=(0,1), y=(0,4)
262        let t = lb.as_triplet();
263        // B should equal A = diag(2, 4).
264        let mut b = [[0.0_f64; 2]; 2];
265        for k in 0..t.vals.len() {
266            let i = (t.irow[k] - 1) as usize;
267            let j = (t.jcol[k] - 1) as usize;
268            b[i][j] = t.vals[k];
269            if i != j {
270                b[j][i] = t.vals[k];
271            }
272        }
273        assert!((b[0][0] - 2.0).abs() < 1e-9, "B[0,0] = {}", b[0][0]);
274        assert!((b[1][1] - 4.0).abs() < 1e-9, "B[1,1] = {}", b[1][1]);
275        assert!(b[0][1].abs() < 1e-9, "B[0,1] = {}", b[0][1]);
276    }
277
278    #[test]
279    fn lbfgs_history_cap_drops_oldest() {
280        let mut lb = LBfgs::new(2, 2);
281        lb.update(&[0.0, 0.0], &[0.0, 0.0]);
282        lb.update(&[1.0, 0.0], &[1.0, 0.0]);
283        lb.update(&[2.0, 0.0], &[2.0, 0.0]);
284        lb.update(&[2.0, 1.0], &[2.0, 1.0]);
285        // m_history = 2 means only the most recent 2 pairs are
286        // retained.
287        assert_eq!(lb.pairs.len(), 2);
288    }
289}