1use numra_core::Scalar;
11
12use crate::error::OptimError;
13use crate::lbfgs::two_loop_recursion;
14use crate::types::{IterationRecord, OptimOptions, OptimResult, OptimStatus};
15use std::collections::VecDeque;
16
17#[derive(Clone, Debug)]
19pub struct LbfgsBOptions<S: Scalar> {
20 pub base: OptimOptions<S>,
21 pub memory: usize,
22}
23
24impl<S: Scalar> Default for LbfgsBOptions<S> {
25 fn default() -> Self {
26 Self {
27 base: OptimOptions::default(),
28 memory: 10,
29 }
30 }
31}
32
33impl<S: Scalar> LbfgsBOptions<S> {
34 pub fn memory(mut self, m: usize) -> Self {
35 self.memory = m;
36 self
37 }
38 pub fn max_iter(mut self, n: usize) -> Self {
39 self.base.max_iter = n;
40 self
41 }
42 pub fn gtol(mut self, tol: S) -> Self {
43 self.base.gtol = tol;
44 self
45 }
46}
47
48pub fn project<S: Scalar>(x: &mut [S], bounds: &[Option<(S, S)>]) {
50 for (xi, bi) in x.iter_mut().zip(bounds.iter()) {
51 if let Some((lo, hi)) = bi {
52 *xi = xi.clamp(*lo, *hi);
53 }
54 }
55}
56
57pub fn projected_gradient_norm<S: Scalar>(x: &[S], g: &[S], bounds: &[Option<(S, S)>]) -> S {
59 let mut norm = S::ZERO;
60 for i in 0..x.len() {
61 let mut xi_step = x[i] - g[i];
62 if let Some((lo, hi)) = bounds[i] {
63 xi_step = xi_step.clamp(lo, hi);
64 }
65 norm = norm.max((xi_step - x[i]).abs());
66 }
67 norm
68}
69
70pub fn lbfgsb_minimize<S: Scalar, F, G>(
75 f: F,
76 grad: G,
77 x0: &[S],
78 bounds: &[Option<(S, S)>],
79 opts: &LbfgsBOptions<S>,
80) -> Result<OptimResult<S>, OptimError>
81where
82 F: Fn(&[S]) -> S,
83 G: Fn(&[S], &mut [S]),
84{
85 let start = std::time::Instant::now();
86 let n = x0.len();
87 if n != bounds.len() {
88 return Err(OptimError::DimensionMismatch {
89 expected: n,
90 actual: bounds.len(),
91 });
92 }
93
94 let m = opts.memory;
95 let mut x = x0.to_vec();
96 project(&mut x, bounds);
97
98 let mut g_buf = vec![S::ZERO; n];
99 let mut fval = f(&x);
100 grad(&x, &mut g_buf);
101 let mut n_feval = 1_usize;
102 let mut n_geval = 1_usize;
103
104 let mut s_hist: VecDeque<Vec<S>> = VecDeque::with_capacity(m);
105 let mut y_hist: VecDeque<Vec<S>> = VecDeque::with_capacity(m);
106 let mut rho_hist: VecDeque<S> = VecDeque::with_capacity(m);
107
108 let c1 = S::from_f64(1e-4);
109 let mut f_prev;
110 let mut history = Vec::new();
111
112 for iter in 0..opts.base.max_iter {
113 f_prev = fval;
114 let pg_norm = projected_gradient_norm(&x, &g_buf, bounds);
115 if pg_norm < opts.base.gtol {
116 let active = active_bounds_at(&x, bounds);
117 return Ok((OptimResult {
118 active_bounds: active,
119 history,
120 ..OptimResult::unconstrained(
121 x,
122 fval,
123 g_buf,
124 iter,
125 n_feval,
126 n_geval,
127 true,
128 format!(
129 "Converged: projected gradient norm {:.2e}",
130 pg_norm.to_f64()
131 ),
132 OptimStatus::GradientConverged,
133 )
134 })
135 .with_wall_time(start));
136 }
137
138 let d = two_loop_recursion::<S>(&g_buf, &s_hist, &y_hist, &rho_hist);
140
141 let mut alpha = S::ONE;
144 let mut x_new = vec![S::ZERO; n];
145 let mut f_new;
146 let mut accepted = false;
147
148 for _ls in 0..30 {
149 for i in 0..n {
150 x_new[i] = x[i] + alpha * d[i];
151 }
152 project(&mut x_new, bounds);
153
154 f_new = f(&x_new);
155 n_feval += 1;
156
157 let dg: S = g_buf
159 .iter()
160 .zip(x_new.iter().zip(x.iter()))
161 .map(|(gi, (xn, xo))| *gi * (*xn - *xo))
162 .sum();
163
164 if f_new <= fval + c1 * dg {
165 accepted = true;
166 fval = f_new;
167 break;
168 }
169
170 alpha *= S::from_f64(0.5);
171 }
172
173 if !accepted {
174 alpha = S::ONE;
176 for _ls in 0..30 {
177 for i in 0..n {
178 x_new[i] = x[i] - alpha * g_buf[i];
179 }
180 project(&mut x_new, bounds);
181
182 f_new = f(&x_new);
183 n_feval += 1;
184
185 let dg: S = g_buf
186 .iter()
187 .zip(x_new.iter().zip(x.iter()))
188 .map(|(gi, (xn, xo))| *gi * (*xn - *xo))
189 .sum();
190
191 if f_new <= fval + c1 * dg {
192 fval = f_new;
193 accepted = true;
194 break;
195 }
196 alpha *= S::from_f64(0.5);
197 }
198
199 if !accepted {
200 fval = f(&x_new);
202 n_feval += 1;
203 }
204 }
205
206 let s: Vec<S> = x_new.iter().zip(x.iter()).map(|(a, b)| *a - *b).collect();
208 let s_norm: S = s.iter().copied().map(|si| si * si).sum::<S>().sqrt();
209
210 history.push(IterationRecord {
211 iteration: iter,
212 objective: fval,
213 gradient_norm: pg_norm,
214 step_size: alpha,
215 constraint_violation: S::ZERO,
216 });
217
218 let x_norm: S = x.iter().copied().map(|xi| xi * xi).sum::<S>().sqrt();
220 if s_norm < opts.base.xtol * (S::ONE + x_norm) {
221 let active = active_bounds_at(&x_new, bounds);
222 grad(&x_new, &mut g_buf);
223 n_geval += 1;
224 return Ok((OptimResult {
225 active_bounds: active,
226 history,
227 ..OptimResult::unconstrained(
228 x_new,
229 fval,
230 g_buf,
231 iter + 1,
232 n_feval,
233 n_geval,
234 true,
235 "step size below tolerance".into(),
236 OptimStatus::StepConverged,
237 )
238 })
239 .with_wall_time(start));
240 }
241
242 let f_change = (f_prev - fval).abs();
244 if f_change < opts.base.ftol * (S::ONE + f_prev.abs()) && iter > 0 {
245 grad(&x_new, &mut g_buf);
246 n_geval += 1;
247 let active = active_bounds_at(&x_new, bounds);
248 return Ok((OptimResult {
249 active_bounds: active,
250 history,
251 ..OptimResult::unconstrained(
252 x_new,
253 fval,
254 g_buf,
255 iter + 1,
256 n_feval,
257 n_geval,
258 true,
259 "function change below tolerance".into(),
260 OptimStatus::FunctionConverged,
261 )
262 })
263 .with_wall_time(start));
264 }
265
266 x = x_new;
267
268 let mut g_new = vec![S::ZERO; n];
270 grad(&x, &mut g_new);
271 n_geval += 1;
272
273 let y: Vec<S> = g_new
275 .iter()
276 .zip(g_buf.iter())
277 .map(|(gn, go)| *gn - *go)
278 .collect();
279 let sy: S = s.iter().zip(y.iter()).map(|(si, yi)| *si * *yi).sum();
280
281 if sy > S::from_f64(1e-16) {
282 if s_hist.len() == m {
283 s_hist.pop_front();
284 y_hist.pop_front();
285 rho_hist.pop_front();
286 }
287 rho_hist.push_back(S::ONE / sy);
288 s_hist.push_back(s);
289 y_hist.push_back(y);
290 }
291
292 g_buf = g_new;
293 }
294
295 let active = active_bounds_at(&x, bounds);
296 Ok((OptimResult {
297 active_bounds: active,
298 history,
299 ..OptimResult::unconstrained(
300 x,
301 fval,
302 g_buf,
303 opts.base.max_iter,
304 n_feval,
305 n_geval,
306 false,
307 format!("Maximum iterations ({}) reached", opts.base.max_iter),
308 OptimStatus::MaxIterations,
309 )
310 })
311 .with_wall_time(start))
312}
313
314fn active_bounds_at<S: Scalar>(x: &[S], bounds: &[Option<(S, S)>]) -> Vec<usize> {
316 let tol = S::from_f64(1e-12);
317 let mut active = Vec::new();
318 for (i, bi) in bounds.iter().enumerate() {
319 if let Some((lo, hi)) = bi {
320 if (x[i] - *lo).abs() < tol || (x[i] - *hi).abs() < tol {
321 active.push(i);
322 }
323 }
324 }
325 active
326}
327
328#[cfg(test)]
329mod tests {
330 use super::*;
331
332 #[test]
333 fn test_project() {
334 let mut x = vec![-2.0, 5.0, 0.5];
335 let bounds = vec![Some((0.0, 1.0)), Some((0.0, 3.0)), None];
336 project(&mut x, &bounds);
337 assert_eq!(x, vec![0.0, 3.0, 0.5]);
338 }
339
340 #[test]
341 fn test_projected_gradient_norm() {
342 let x = [0.0, 1.0];
343 let g = [-1.0, 2.0];
344 let bounds = vec![Some((0.0, 1.0)), Some((0.0, 1.0))];
345 let pgn = projected_gradient_norm(&x, &g, &bounds);
346 assert!((pgn - 1.0).abs() < 1e-14);
347 }
348
349 #[test]
350 fn test_lbfgsb_quadratic_at_boundary() {
351 let f = |x: &[f64]| (x[0] - 2.0).powi(2) + (x[1] - 2.0).powi(2);
355 let g = |x: &[f64], grad: &mut [f64]| {
356 grad[0] = 2.0 * (x[0] - 2.0);
357 grad[1] = 2.0 * (x[1] - 2.0);
358 };
359 let bounds = vec![Some((0.0, 1.0)), Some((0.0, 3.0))];
360 let opts = LbfgsBOptions {
361 base: OptimOptions::default().gtol(1e-10),
362 memory: 5,
363 };
364
365 let result = lbfgsb_minimize(f, g, &[0.5, 0.5], &bounds, &opts).unwrap();
366 assert!(result.converged, "did not converge: {}", result.message);
367 assert!((result.x[0] - 1.0).abs() < 1e-6, "x0={}", result.x[0]);
368 assert!((result.x[1] - 2.0).abs() < 1e-6, "x1={}", result.x[1]);
369 assert!(result.active_bounds.contains(&0));
370 }
371
372 #[test]
373 fn test_lbfgsb_rosenbrock_bounded() {
374 let f = |x: &[f64]| (1.0 - x[0]).powi(2) + 100.0 * (x[1] - x[0] * x[0]).powi(2);
377 let g = |x: &[f64], grad: &mut [f64]| {
378 grad[0] = -2.0 * (1.0 - x[0]) - 400.0 * x[0] * (x[1] - x[0] * x[0]);
379 grad[1] = 200.0 * (x[1] - x[0] * x[0]);
380 };
381 let bounds = vec![Some((0.0, 2.0)), Some((0.0, 2.0))];
382 let opts = LbfgsBOptions {
383 base: OptimOptions::default().gtol(1e-6).max_iter(2000),
384 memory: 10,
385 };
386
387 let result = lbfgsb_minimize(f, g, &[0.1, 0.1], &bounds, &opts).unwrap();
388 assert!(result.converged, "did not converge: {}", result.message);
389 assert!(result.x[0] >= -1e-10 && result.x[0] <= 2.0 + 1e-10);
390 assert!(result.x[1] >= -1e-10 && result.x[1] <= 2.0 + 1e-10);
391 assert!((result.x[0] - 1.0).abs() < 1e-2, "x0={}", result.x[0]);
392 assert!((result.x[1] - 1.0).abs() < 1e-2, "x1={}", result.x[1]);
393 }
394
395 #[test]
396 fn test_lbfgsb_sphere_all_bounded() {
397 let f = |x: &[f64]| x.iter().map(|xi| xi * xi).sum::<f64>();
400 let g = |x: &[f64], grad: &mut [f64]| {
401 for i in 0..x.len() {
402 grad[i] = 2.0 * x[i];
403 }
404 };
405 let bounds: Vec<Option<(f64, f64)>> = vec![Some((1.0, 10.0)); 5];
406 let opts = LbfgsBOptions::default().gtol(1e-10);
407
408 let result = lbfgsb_minimize(f, g, &[5.0; 5], &bounds, &opts).unwrap();
409 assert!(result.converged, "did not converge: {}", result.message);
410 for &xi in &result.x {
411 assert!((xi - 1.0).abs() < 1e-6, "x_i={}", xi);
412 }
413 }
414}