pounce_algorithm/sqp/lbfgs.rs
1//! Limited-memory Powell-damped BFGS Hessian approximation for
2//! SQP (Nocedal-Wright §7.2; Byrd-Nocedal-Schnabel 1994; Powell
3//! 1978). Used when `SqpOptions::hessian = Lbfgs`.
4//!
5//! Maintains a fixed-length circular buffer of `(s, y)` pairs and,
6//! at each `as_triplet` query, reconstructs the dense Hessian B_k
7//! by replaying the rank-2 BFGS updates from a scaled-identity
8//! seed `B_0 = γ_k I` with `γ_k = (s_{last}·y_{last}) /
9//! (y_{last}·y_{last})` (Nocedal-Wright eq. 7.20 — the standard
10//! initial-scaling heuristic).
11//!
12//! Phase 5b commit 15 implements the dense materialization path
13//! (output is a full upper-triangular [`Triplet`] consumed by
14//! `pounce-qp`'s `SqpQpData`). A future commit can add a
15//! matrix-free product interface so the QP can avoid the
16//! `O(n²)` storage when `m_history ≪ n`.
17//!
18//! Powell damping is identical to [`crate::sqp::bfgs::DampedBfgs`]:
19//!
20//! ```text
21//! if s·y ≥ 0.2 · s·B s : θ = 1
22//! else : θ = 0.8 · s·B s / (s·B s − s·y)
23//! y_damp = θ y + (1 − θ) B s
24//! ```
25//!
26//! …but `B` here is the rebuilt one, accumulated within
27//! `materialize`, so the damping factor for each historical pair
28//! is computed at replay time against the running B (matching
29//! how a full BFGS would have evolved).
30
31use crate::sqp::qp_assembly::Triplet;
32use pounce_common::types::{Index, Number};
33use std::collections::VecDeque;
34
35/// Limited-memory Powell-damped BFGS, storing up to `m_history`
36/// most-recent `(s, y)` pairs.
37pub struct LBfgs {
38 n: usize,
39 m_history: usize,
40 /// Circular buffer of (s_k, y_k) pairs, oldest first.
41 pairs: VecDeque<(Vec<Number>, Vec<Number>)>,
42 /// Previous `x` and `∇L`; the next call to [`Self::update`]
43 /// builds the next `(s, y)` from these.
44 prev_x: Option<Vec<Number>>,
45 prev_grad_lag: Option<Vec<Number>>,
46 /// 1-based upper-triangle sparsity pattern (matches DampedBfgs).
47 h_irow: Vec<Index>,
48 h_jcol: Vec<Index>,
49}
50
51impl LBfgs {
52 /// `m_history` is the number of `(s, y)` pairs to keep
53 /// (Nocedal-Wright recommends 3–20; upstream IPOPT defaults to
54 /// 6). Must be ≥ 1; a value of 0 would degenerate to plain
55 /// identity which is rarely useful.
56 pub fn new(n: usize, m_history: usize) -> Self {
57 debug_assert!(m_history >= 1);
58 let nz = n * (n + 1) / 2;
59 let mut h_irow = Vec::with_capacity(nz);
60 let mut h_jcol = Vec::with_capacity(nz);
61 for i in 0..n {
62 for j in 0..=i {
63 h_irow.push((i + 1) as Index);
64 h_jcol.push((j + 1) as Index);
65 }
66 }
67 Self {
68 n,
69 m_history,
70 pairs: VecDeque::with_capacity(m_history),
71 prev_x: None,
72 prev_grad_lag: None,
73 h_irow,
74 h_jcol,
75 }
76 }
77
78 pub fn has_prev(&self) -> bool {
79 self.prev_x.is_some()
80 }
81
82 /// Record a new pair `(s, y) = (x_new − x_prev, ∇L_new −
83 /// ∇L_prev)` if a prior iterate exists. The first call merely
84 /// stores `(x_new, ∇L_new)`.
85 pub fn update(&mut self, x_new: &[Number], grad_lag_new: &[Number]) {
86 // Hard assert (PR #50 review S5): see BFGS::update.
87 assert_eq!(x_new.len(), self.n, "LBFGS::update: x_new.len() != n");
88 assert_eq!(
89 grad_lag_new.len(),
90 self.n,
91 "LBFGS::update: grad_lag_new.len() != n"
92 );
93
94 if let (Some(prev_x), Some(prev_grad_lag)) =
95 (self.prev_x.as_ref(), self.prev_grad_lag.as_ref())
96 {
97 let s: Vec<Number> = x_new
98 .iter()
99 .zip(prev_x.iter())
100 .map(|(a, b)| a - b)
101 .collect();
102 let y: Vec<Number> = grad_lag_new
103 .iter()
104 .zip(prev_grad_lag.iter())
105 .map(|(a, b)| a - b)
106 .collect();
107 // Skip degenerate pairs (s ≈ 0).
108 let s_norm2: Number = s.iter().map(|v| v * v).sum();
109 if s_norm2 > 1e-30 {
110 if self.pairs.len() == self.m_history {
111 self.pairs.pop_front();
112 }
113 self.pairs.push_back((s, y));
114 }
115 }
116
117 self.prev_x = Some(x_new.to_vec());
118 self.prev_grad_lag = Some(grad_lag_new.to_vec());
119 }
120
121 /// Materialize the current B_k as a dense `Triplet` over the
122 /// upper triangle. Always returns the full `n(n+1)/2` triplets
123 /// (the same fixed pattern across iterations, so the QP
124 /// solver's symbolic factorization stays valid).
125 pub fn as_triplet(&self) -> Triplet {
126 let b_dense = self.materialize();
127 let mut vals = Vec::with_capacity(self.h_irow.len());
128 for i in 0..self.n {
129 for j in 0..=i {
130 vals.push(b_dense[i * self.n + j]);
131 }
132 }
133 Triplet {
134 n_rows: self.n,
135 n_cols: self.n,
136 irow: self.h_irow.clone(),
137 jcol: self.h_jcol.clone(),
138 vals,
139 }
140 }
141
142 /// Build the dense `n×n` row-major B_k by seeding `B_0 = γI`
143 /// (Nocedal-Wright eq. 7.20) and replaying Powell-damped BFGS
144 /// updates for every stored pair. Returned values are
145 /// symmetric (only the lower triangle is read by `as_triplet`).
146 fn materialize(&self) -> Vec<Number> {
147 let n = self.n;
148 // Initial scaling γ: most-recent (s, y) ratio. Defaults to
149 // 1.0 when no pairs exist or sᵀy is tiny.
150 let gamma = self
151 .pairs
152 .back()
153 .and_then(|(s, y)| {
154 let sy: Number = s.iter().zip(y.iter()).map(|(a, b)| a * b).sum();
155 let yy: Number = y.iter().map(|v| v * v).sum();
156 if yy > 1e-30 && sy > 1e-30 {
157 Some(yy / sy)
158 } else {
159 None
160 }
161 })
162 .unwrap_or(1.0);
163 let mut b = vec![0.0_f64; n * n];
164 for i in 0..n {
165 b[i * n + i] = gamma;
166 }
167
168 for (s, y) in self.pairs.iter() {
169 // bs = B · s
170 let mut bs = vec![0.0_f64; n];
171 for i in 0..n {
172 let mut acc = 0.0_f64;
173 let row = &b[i * n..i * n + n];
174 for j in 0..n {
175 acc += row[j] * s[j];
176 }
177 bs[i] = acc;
178 }
179 let s_bs: Number = s.iter().zip(bs.iter()).map(|(a, b)| a * b).sum();
180 let s_y: Number = s.iter().zip(y.iter()).map(|(a, b)| a * b).sum();
181
182 let theta = if s_y >= 0.2 * s_bs {
183 1.0
184 } else if s_bs - s_y > 1e-14 {
185 0.8 * s_bs / (s_bs - s_y)
186 } else {
187 1.0
188 };
189 let y_damp: Vec<Number> = y
190 .iter()
191 .zip(bs.iter())
192 .map(|(yi, bsi)| theta * yi + (1.0 - theta) * bsi)
193 .collect();
194 let s_y_damp: Number = s.iter().zip(y_damp.iter()).map(|(a, b)| a * b).sum();
195
196 if s_bs > 1e-14 && s_y_damp > 1e-14 {
197 for i in 0..n {
198 for j in 0..n {
199 let delta = -(bs[i] * bs[j]) / s_bs + (y_damp[i] * y_damp[j]) / s_y_damp;
200 b[i * n + j] += delta;
201 }
202 }
203 }
204 }
205 b
206 }
207}
208
209#[cfg(test)]
210mod tests {
211 use super::*;
212
213 #[test]
214 fn lbfgs_seeds_identity_with_no_pairs() {
215 let lb = LBfgs::new(3, 5);
216 let t = lb.as_triplet();
217 // Diagonal should be 1.0, off-diagonals 0.0.
218 for k in 0..t.vals.len() {
219 let i = (t.irow[k] - 1) as usize;
220 let j = (t.jcol[k] - 1) as usize;
221 let expected = if i == j { 1.0 } else { 0.0 };
222 assert!(
223 (t.vals[k] - expected).abs() < 1e-15,
224 "B[{i},{j}] = {} but expected {expected}",
225 t.vals[k]
226 );
227 }
228 }
229
230 #[test]
231 fn lbfgs_first_update_only_records_pair() {
232 let mut lb = LBfgs::new(2, 3);
233 lb.update(&[0.0, 0.0], &[1.0, 1.0]);
234 assert!(lb.has_prev());
235 assert!(lb.pairs.is_empty());
236 // Without any pairs the matrix is still γI = I.
237 let t = lb.as_triplet();
238 let diag: Vec<_> = t
239 .vals
240 .iter()
241 .enumerate()
242 .filter(|(k, _)| t.irow[*k] == t.jcol[*k])
243 .map(|(_, v)| *v)
244 .collect();
245 assert!((diag[0] - 1.0).abs() < 1e-15);
246 assert!((diag[1] - 1.0).abs() < 1e-15);
247 }
248
249 #[test]
250 fn lbfgs_quadratic_recovers_exact_hessian_at_convergence() {
251 // For the quadratic f(x) = ½ xᵀ A x with A = diag(2, 4),
252 // ∇f = Ax, so along iterates x_k = x_{k-1} + s_{k-1}:
253 // y_{k-1} = A s_{k-1}. Pumping ≥ 2 linearly independent
254 // (s, y) pairs into L-BFGS must rebuild B_2 = A exactly
255 // (up to numerical roundoff) because the rank-2 corrections
256 // collapse onto A in 2-D.
257 let mut lb = LBfgs::new(2, 5);
258 // Pair 1: s = (1, 0), y = A·s = (2, 0).
259 lb.update(&[0.0, 0.0], &[0.0, 0.0]); // record start
260 lb.update(&[1.0, 0.0], &[2.0, 0.0]); // produces (s, y) #1
261 lb.update(&[1.0, 1.0], &[2.0, 4.0]); // produces (s, y) #2 with s=(0,1), y=(0,4)
262 let t = lb.as_triplet();
263 // B should equal A = diag(2, 4).
264 let mut b = [[0.0_f64; 2]; 2];
265 for k in 0..t.vals.len() {
266 let i = (t.irow[k] - 1) as usize;
267 let j = (t.jcol[k] - 1) as usize;
268 b[i][j] = t.vals[k];
269 if i != j {
270 b[j][i] = t.vals[k];
271 }
272 }
273 assert!((b[0][0] - 2.0).abs() < 1e-9, "B[0,0] = {}", b[0][0]);
274 assert!((b[1][1] - 4.0).abs() < 1e-9, "B[1,1] = {}", b[1][1]);
275 assert!(b[0][1].abs() < 1e-9, "B[0,1] = {}", b[0][1]);
276 }
277
278 #[test]
279 fn lbfgs_history_cap_drops_oldest() {
280 let mut lb = LBfgs::new(2, 2);
281 lb.update(&[0.0, 0.0], &[0.0, 0.0]);
282 lb.update(&[1.0, 0.0], &[1.0, 0.0]);
283 lb.update(&[2.0, 0.0], &[2.0, 0.0]);
284 lb.update(&[2.0, 1.0], &[2.0, 1.0]);
285 // m_history = 2 means only the most recent 2 pairs are
286 // retained.
287 assert_eq!(lb.pairs.len(), 2);
288 }
289}