1use scivex_core::{Float, Tensor};
8
9use crate::error::Result;
10
11use super::{MinimizeOptions, MinimizeResult};
12
13#[cfg_attr(
24 feature = "serde-support",
25 derive(serde::Serialize, serde::Deserialize)
26)]
27#[derive(Debug, Clone)]
28pub struct Bounds<T> {
29 pub lower: Option<T>,
31 pub upper: Option<T>,
33}
34
35impl<T> Bounds<T> {
36 pub fn none() -> Self {
47 Self {
48 lower: None,
49 upper: None,
50 }
51 }
52
53 pub fn lower(lb: T) -> Self {
63 Self {
64 lower: Some(lb),
65 upper: None,
66 }
67 }
68
69 pub fn upper(ub: T) -> Self {
79 Self {
80 lower: None,
81 upper: Some(ub),
82 }
83 }
84
85 pub fn both(lb: T, ub: T) -> Self {
96 Self {
97 lower: Some(lb),
98 upper: Some(ub),
99 }
100 }
101}
102
103fn project<T: Float>(x: &mut [T], bounds: &[Bounds<T>]) {
105 for (xi, b) in x.iter_mut().zip(bounds.iter()) {
106 #[allow(clippy::collapsible_if)]
107 if let Some(lb) = b.lower {
108 if *xi < lb {
109 *xi = lb;
110 }
111 }
112 #[allow(clippy::collapsible_if)]
113 if let Some(ub) = b.upper {
114 if *xi > ub {
115 *xi = ub;
116 }
117 }
118 }
119}
120
121pub fn lbfgsb<T, F, G>(
142 f: F,
143 grad: G,
144 x0: &Tensor<T>,
145 bounds: &[Bounds<T>],
146 options: &MinimizeOptions<T>,
147) -> Result<MinimizeResult<T>>
148where
149 T: Float,
150 F: Fn(&Tensor<T>) -> T,
151 G: Fn(&Tensor<T>) -> Tensor<T>,
152{
153 let n = x0.numel();
154 let mut state = LbfgsbState::new(&f, &grad, x0, bounds, n);
155
156 for iter in 0..options.max_iter {
157 let pg_norm = projected_gradient_norm(&state.x, &state.g, bounds);
158 if pg_norm < options.gtol {
159 return Ok(state.into_result(iter, true));
160 }
161
162 let d = lbfgs_direction(&state.g, &state.s_hist, &state.y_hist, &state.rho_hist, n);
163 let p: Vec<T> = d.iter().map(|&v| -v).collect();
164
165 let (alpha, fx_new, ls_evals) = projected_line_search(
166 &f,
167 &LineSearchInput {
168 x: &state.x,
169 p: &p,
170 fx: state.fx,
171 g: &state.g,
172 bounds,
173 n,
174 },
175 );
176 state.f_evals += ls_evals;
177
178 let (step_norm, f_change) = state.step(&grad, &p, alpha, fx_new, bounds, n);
179
180 if step_norm < options.xtol
181 || (f_change < options.ftol && f_change < options.ftol * state.fx.abs().max(T::one()))
182 {
183 return Ok(state.into_result(iter + 1, true));
184 }
185 }
186
187 Ok(state.into_result(options.max_iter, false))
188}
189
190struct LbfgsbState<T: Float> {
191 x: Vec<T>,
192 x_tensor: Tensor<T>,
193 fx: T,
194 g: Vec<T>,
195 g_tensor: Tensor<T>,
196 f_evals: usize,
197 g_evals: usize,
198 s_hist: Vec<Vec<T>>,
199 y_hist: Vec<Vec<T>>,
200 rho_hist: Vec<T>,
201}
202
203impl<T: Float> LbfgsbState<T> {
204 fn new<F, G>(f: &F, grad: &G, x0: &Tensor<T>, bounds: &[Bounds<T>], n: usize) -> Self
205 where
206 F: Fn(&Tensor<T>) -> T,
207 G: Fn(&Tensor<T>) -> Tensor<T>,
208 {
209 let mut x = x0.as_slice().to_vec();
210 project(&mut x, bounds);
211 let x_tensor = Tensor::from_vec(x.clone(), vec![n]).expect("projected x0 length matches n");
212 let fx = f(&x_tensor);
213 let g_tensor = grad(&x_tensor);
214 let g = g_tensor.as_slice().to_vec();
215 Self {
216 x,
217 x_tensor,
218 fx,
219 g,
220 g_tensor,
221 f_evals: 1,
222 g_evals: 1,
223 s_hist: Vec::with_capacity(10),
224 y_hist: Vec::with_capacity(10),
225 rho_hist: Vec::with_capacity(10),
226 }
227 }
228
229 fn step<G: Fn(&Tensor<T>) -> Tensor<T>>(
230 &mut self,
231 grad: &G,
232 p: &[T],
233 alpha: T,
234 fx_new: T,
235 bounds: &[Bounds<T>],
236 n: usize,
237 ) -> (T, T) {
238 let mut x_new = vec![T::zero(); n];
239 for j in 0..n {
240 x_new[j] = self.x[j] + alpha * p[j];
241 }
242 project(&mut x_new, bounds);
243
244 let x_new_tensor =
245 Tensor::from_vec(x_new.clone(), vec![n]).expect("new x length matches n");
246 let g_new_tensor = grad(&x_new_tensor);
247 self.g_evals += 1;
248 let g_new = g_new_tensor.as_slice().to_vec();
249
250 let mut sy = T::zero();
251 let mut s_k = vec![T::zero(); n];
252 let mut y_k = vec![T::zero(); n];
253 for j in 0..n {
254 s_k[j] = x_new[j] - self.x[j];
255 y_k[j] = g_new[j] - self.g[j];
256 sy += s_k[j] * y_k[j];
257 }
258
259 let step_norm: T = s_k.iter().map(|&v| v * v).sum::<T>().sqrt();
260 let f_change = (self.fx - fx_new).abs();
261
262 if sy > T::epsilon() {
263 if self.s_hist.len() == 10 {
264 self.s_hist.remove(0);
265 self.y_hist.remove(0);
266 self.rho_hist.remove(0);
267 }
268 self.s_hist.push(s_k);
269 self.y_hist.push(y_k);
270 self.rho_hist.push(T::one() / sy);
271 }
272
273 self.x = x_new;
274 self.x_tensor = x_new_tensor;
275 self.fx = fx_new;
276 self.g = g_new;
277 self.g_tensor = g_new_tensor;
278
279 (step_norm, f_change)
280 }
281
282 fn into_result(self, iterations: usize, converged: bool) -> MinimizeResult<T> {
283 MinimizeResult {
284 x: self.x_tensor,
285 f_val: self.fx,
286 grad: Some(self.g_tensor),
287 iterations,
288 f_evals: self.f_evals,
289 g_evals: self.g_evals,
290 converged,
291 }
292 }
293}
294
295fn projected_gradient_norm<T: Float>(x: &[T], g: &[T], bounds: &[Bounds<T>]) -> T {
301 let mut norm_sq = T::zero();
302 for i in 0..x.len() {
303 let gi = g[i];
304 let at_lower = bounds[i]
305 .lower
306 .is_some_and(|lb| x[i] <= lb && gi > T::zero());
307 let at_upper = bounds[i]
308 .upper
309 .is_some_and(|ub| x[i] >= ub && gi < T::zero());
310 if !at_lower && !at_upper {
311 norm_sq += gi * gi;
312 }
313 }
314 norm_sq.sqrt()
315}
316
317fn lbfgs_direction<T: Float>(
319 g: &[T],
320 s_history: &[Vec<T>],
321 y_history: &[Vec<T>],
322 rho_history: &[T],
323 n: usize,
324) -> Vec<T> {
325 let k = s_history.len();
326 if k == 0 {
327 return g.to_vec();
329 }
330
331 let mut q = g.to_vec();
332 let mut alphas = vec![T::zero(); k];
333
334 for i in (0..k).rev() {
336 let mut si_q = T::zero();
337 for j in 0..n {
338 si_q += s_history[i][j] * q[j];
339 }
340 alphas[i] = rho_history[i] * si_q;
341 for j in 0..n {
342 q[j] -= alphas[i] * y_history[i][j];
343 }
344 }
345
346 let last = k - 1;
349 let mut sy = T::zero();
350 let mut yy = T::zero();
351 for j in 0..n {
352 sy += s_history[last][j] * y_history[last][j];
353 yy += y_history[last][j] * y_history[last][j];
354 }
355 let gamma = if yy > T::epsilon() { sy / yy } else { T::one() };
356
357 let mut r: Vec<T> = q.iter().map(|&v| gamma * v).collect();
358
359 for i in 0..k {
361 let mut yi_r = T::zero();
362 for j in 0..n {
363 yi_r += y_history[i][j] * r[j];
364 }
365 let beta = rho_history[i] * yi_r;
366 for j in 0..n {
367 r[j] += (alphas[i] - beta) * s_history[i][j];
368 }
369 }
370
371 r
372}
373
374struct LineSearchInput<'a, T> {
375 x: &'a [T],
376 p: &'a [T],
377 fx: T,
378 g: &'a [T],
379 bounds: &'a [Bounds<T>],
380 n: usize,
381}
382
383fn projected_line_search<T, F>(f: &F, input: &LineSearchInput<'_, T>) -> (T, T, usize)
385where
386 T: Float,
387 F: Fn(&Tensor<T>) -> T,
388{
389 let x = input.x;
390 let p = input.p;
391 let fx = input.fx;
392 let g = input.g;
393 let bounds = input.bounds;
394 let n = input.n;
395
396 let c1 = T::from_f64(1e-4);
397 let shrink = T::from_f64(0.5);
398
399 let dg: T = g.iter().zip(p.iter()).map(|(&gi, &pi)| gi * pi).sum();
400
401 let mut alpha = T::one();
402 let mut evals = 0usize;
403 let max_ls = 20usize;
404
405 for _ in 0..max_ls {
406 let mut x_trial = vec![T::zero(); n];
407 for j in 0..n {
408 x_trial[j] = x[j] + alpha * p[j];
409 }
410 project(&mut x_trial, bounds);
411
412 let t = Tensor::from_vec(x_trial, vec![n]).expect("trial point length matches n");
413 let f_trial = f(&t);
414 evals += 1;
415
416 if f_trial <= fx + c1 * alpha * dg {
417 return (alpha, f_trial, evals);
418 }
419 alpha *= shrink;
420 }
421
422 let mut x_trial = vec![T::zero(); n];
424 for j in 0..n {
425 x_trial[j] = x[j] + alpha * p[j];
426 }
427 project(&mut x_trial, bounds);
428 let t = Tensor::from_vec(x_trial, vec![n]).expect("trial point length matches n");
429 let f_trial = f(&t);
430 evals += 1;
431 (alpha, f_trial, evals)
432}
433
434#[cfg(test)]
435mod tests {
436 use super::*;
437
438 #[test]
439 fn test_lbfgsb_unconstrained_quadratic() {
440 let f = |x: &Tensor<f64>| {
441 let s = x.as_slice();
442 s[0] * s[0] + s[1] * s[1]
443 };
444 let grad = |x: &Tensor<f64>| {
445 let s = x.as_slice();
446 Tensor::from_vec(vec![2.0 * s[0], 2.0 * s[1]], vec![2]).unwrap()
447 };
448
449 let x0 = Tensor::from_vec(vec![5.0, 5.0], vec![2]).unwrap();
450 let bounds = vec![Bounds::none(), Bounds::none()];
451 let result = lbfgsb(f, grad, &x0, &bounds, &MinimizeOptions::default()).unwrap();
452 assert!(result.converged);
453 let s = result.x.as_slice();
454 assert!(s[0].abs() < 1e-6, "x = {}", s[0]);
455 assert!(s[1].abs() < 1e-6, "y = {}", s[1]);
456 }
457
458 #[test]
459 fn test_lbfgsb_active_bounds() {
460 let f = |x: &Tensor<f64>| {
463 let s = x.as_slice();
464 (s[0] - 3.0).powi(2) + (s[1] - 3.0).powi(2)
465 };
466 let grad = |x: &Tensor<f64>| {
467 let s = x.as_slice();
468 Tensor::from_vec(vec![2.0 * (s[0] - 3.0), 2.0 * (s[1] - 3.0)], vec![2]).unwrap()
469 };
470
471 let x0 = Tensor::from_vec(vec![1.0, 2.0], vec![2]).unwrap();
472 let bounds = vec![Bounds::lower(1.0), Bounds::lower(2.0)];
473 let result = lbfgsb(f, grad, &x0, &bounds, &MinimizeOptions::default()).unwrap();
474 assert!(result.converged);
475 let s = result.x.as_slice();
476 assert!((s[0] - 3.0).abs() < 1e-4, "x = {}", s[0]);
477 assert!((s[1] - 3.0).abs() < 1e-4, "y = {}", s[1]);
478 }
479
480 #[test]
481 fn test_lbfgsb_active_bound_at_solution() {
482 let f = |x: &Tensor<f64>| {
485 let s = x.as_slice();
486 (s[0] + 1.0).powi(2)
487 };
488 let grad = |x: &Tensor<f64>| {
489 let s = x.as_slice();
490 Tensor::from_vec(vec![2.0 * (s[0] + 1.0)], vec![1]).unwrap()
491 };
492
493 let x0 = Tensor::from_vec(vec![5.0], vec![1]).unwrap();
494 let bounds = vec![Bounds::lower(0.0)];
495 let result = lbfgsb(f, grad, &x0, &bounds, &MinimizeOptions::default()).unwrap();
496 assert!(result.converged);
497 let s = result.x.as_slice();
498 assert!(s[0].abs() < 1e-6, "x = {}, expected 0.0", s[0]);
499 }
500
501 #[test]
502 fn test_lbfgsb_all_bounded() {
503 let f = |x: &Tensor<f64>| {
506 let s = x.as_slice();
507 s[0] * s[0] + s[1] * s[1]
508 };
509 let grad = |x: &Tensor<f64>| {
510 let s = x.as_slice();
511 Tensor::from_vec(vec![2.0 * s[0], 2.0 * s[1]], vec![2]).unwrap()
512 };
513
514 let x0 = Tensor::from_vec(vec![1.5, 1.5], vec![2]).unwrap();
515 let bounds = vec![Bounds::both(1.0, 2.0), Bounds::both(1.0, 2.0)];
516 let result = lbfgsb(f, grad, &x0, &bounds, &MinimizeOptions::default()).unwrap();
517 assert!(result.converged);
518 let s = result.x.as_slice();
519 assert!((s[0] - 1.0).abs() < 1e-4, "x = {}", s[0]);
520 assert!((s[1] - 1.0).abs() < 1e-4, "y = {}", s[1]);
521 }
522}