1use numra_core::Scalar;
16
17use crate::error::OptimError;
18use crate::types::{IterationRecord, OptimOptions, OptimResult, OptimStatus};
19use numra_nonlinear::line_search::{wolfe_line_search, WolfeOptions};
20
21fn dot<S: Scalar>(a: &[S], b: &[S]) -> S {
23 a.iter().zip(b.iter()).map(|(ai, bi)| *ai * *bi).sum()
24}
25
26fn norm<S: Scalar>(a: &[S]) -> S {
28 dot(a, a).sqrt()
29}
30
31pub struct Bfgs<S: Scalar> {
45 pub options: OptimOptions<S>,
46}
47
48impl<S: Scalar> Bfgs<S> {
49 pub fn new(options: OptimOptions<S>) -> Self {
51 Self { options }
52 }
53
54 pub fn minimize<F, G>(&self, f: F, grad: G, x0: &[S]) -> Result<OptimResult<S>, OptimError>
58 where
59 F: Fn(&[S]) -> S,
60 G: Fn(&[S], &mut [S]),
61 {
62 bfgs_minimize(f, grad, x0, &self.options)
63 }
64}
65
66pub fn bfgs_minimize<S: Scalar, F, G>(
80 f: F,
81 grad: G,
82 x0: &[S],
83 opts: &OptimOptions<S>,
84) -> Result<OptimResult<S>, OptimError>
85where
86 F: Fn(&[S]) -> S,
87 G: Fn(&[S], &mut [S]),
88{
89 let start = std::time::Instant::now();
90 let n = x0.len();
91 if n == 0 {
92 return Err(OptimError::DimensionMismatch {
93 expected: 1,
94 actual: 0,
95 });
96 }
97
98 let mut x = x0.to_vec();
100 let mut f_val = f(&x);
101 let mut n_feval: usize = 1;
102 let mut n_geval: usize = 0;
103
104 let mut g = vec![S::ZERO; n];
106 grad(&x, &mut g);
107 n_geval += 1;
108
109 let g_norm = norm(&g);
111 if g_norm <= opts.gtol {
112 return Ok(OptimResult::unconstrained(
113 x,
114 f_val,
115 g,
116 0,
117 n_feval,
118 n_geval,
119 true,
120 "gradient norm below tolerance at start".into(),
121 OptimStatus::GradientConverged,
122 )
123 .with_wall_time(start));
124 }
125
126 let mut h_inv = vec![S::ZERO; n * n];
128 for i in 0..n {
129 h_inv[i * n + i] = S::ONE;
130 }
131
132 let mut history = Vec::new();
133
134 let wolfe_opts = WolfeOptions::default();
136
137 let mut d = vec![S::ZERO; n]; let mut g_new = vec![S::ZERO; n];
140 let mut s = vec![S::ZERO; n]; let mut y = vec![S::ZERO; n]; let mut hy = vec![S::ZERO; n]; for iter in 0..opts.max_iter {
145 for i in 0..n {
147 let mut sum = S::ZERO;
148 for j in 0..n {
149 sum += h_inv[i * n + j] * g[j];
150 }
151 d[i] = -sum;
152 }
153
154 let ls_result = wolfe_line_search(&f, &grad, &x, &d, f_val, &g, &wolfe_opts)?;
156 let alpha = ls_result.step;
157 let f_new = ls_result.f_new;
158 n_feval += ls_result.n_eval;
159
160 for i in 0..n {
162 s[i] = alpha * d[i];
163 x[i] += s[i];
164 }
165
166 grad(&x, &mut g_new);
168 n_geval += 1;
169
170 for i in 0..n {
172 y[i] = g_new[i] - g[i];
173 }
174
175 let g_new_norm = norm(&g_new);
177
178 history.push(IterationRecord {
179 iteration: iter,
180 objective: f_new,
181 gradient_norm: g_new_norm,
182 step_size: alpha,
183 constraint_violation: S::ZERO,
184 });
185
186 if g_new_norm <= opts.gtol {
187 g.copy_from_slice(&g_new);
188 return Ok((OptimResult {
189 history,
190 ..OptimResult::unconstrained(
191 x,
192 f_new,
193 g,
194 iter + 1,
195 n_feval,
196 n_geval,
197 true,
198 "gradient norm below tolerance".into(),
199 OptimStatus::GradientConverged,
200 )
201 })
202 .with_wall_time(start));
203 }
204
205 let f_change = (f_new - f_val).abs();
207 if f_change <= opts.ftol * (S::ONE + f_val.abs()) {
208 g.copy_from_slice(&g_new);
209 return Ok((OptimResult {
210 history,
211 ..OptimResult::unconstrained(
212 x,
213 f_new,
214 g,
215 iter + 1,
216 n_feval,
217 n_geval,
218 true,
219 "function change below tolerance".into(),
220 OptimStatus::FunctionConverged,
221 )
222 })
223 .with_wall_time(start));
224 }
225
226 let s_norm = norm(&s);
228 let x_norm = norm(&x);
229 if s_norm <= opts.xtol * (S::ONE + x_norm) {
230 g.copy_from_slice(&g_new);
231 return Ok((OptimResult {
232 history,
233 ..OptimResult::unconstrained(
234 x,
235 f_new,
236 g,
237 iter + 1,
238 n_feval,
239 n_geval,
240 true,
241 "step size below tolerance".into(),
242 OptimStatus::StepConverged,
243 )
244 })
245 .with_wall_time(start));
246 }
247
248 let sy = dot(&s, &y);
250 if sy > S::from_f64(1e-16) {
251 let rho = S::ONE / sy;
252
253 for i in 0..n {
255 let mut sum = S::ZERO;
256 for j in 0..n {
257 sum += h_inv[i * n + j] * y[j];
258 }
259 hy[i] = sum;
260 }
261
262 let yhy = dot(&y, &hy);
263
264 for i in 0..n {
268 for j in 0..n {
269 h_inv[i * n + j] +=
270 rho * ((S::ONE + rho * yhy) * s[i] * s[j] - hy[i] * s[j] - s[i] * hy[j]);
271 }
272 }
273 }
274
275 f_val = f_new;
277 g.copy_from_slice(&g_new);
278 }
279
280 Ok((OptimResult {
281 history,
282 ..OptimResult::unconstrained(
283 x,
284 f_val,
285 g,
286 opts.max_iter,
287 n_feval,
288 n_geval,
289 false,
290 "maximum iterations reached".into(),
291 OptimStatus::MaxIterations,
292 )
293 })
294 .with_wall_time(start))
295}
296
297#[cfg(test)]
298mod tests {
299 use super::*;
300 use approx::assert_abs_diff_eq;
301
302 #[test]
303 fn test_bfgs_quadratic() {
304 let f = |x: &[f64]| x[0] * x[0] + 4.0 * x[1] * x[1];
306 let grad = |x: &[f64], g: &mut [f64]| {
307 g[0] = 2.0 * x[0];
308 g[1] = 8.0 * x[1];
309 };
310
311 let x0 = [5.0, 3.0];
312 let opts = OptimOptions::default().gtol(1e-10);
313 let result = bfgs_minimize(f, grad, &x0, &opts).unwrap();
314
315 assert!(result.converged, "should converge: {}", result.message);
316 assert_abs_diff_eq!(result.x[0], 0.0, epsilon = 1e-8);
317 assert_abs_diff_eq!(result.x[1], 0.0, epsilon = 1e-8);
318 assert_abs_diff_eq!(result.f, 0.0, epsilon = 1e-14);
319 assert!(
320 result.wall_time_secs > 0.0,
321 "wall_time_secs should be positive"
322 );
323 }
324
325 #[test]
326 fn test_bfgs_rosenbrock() {
327 let f = |x: &[f64]| {
329 let a = 1.0 - x[0];
330 let b = x[1] - x[0] * x[0];
331 a * a + 100.0 * b * b
332 };
333 let grad = |x: &[f64], g: &mut [f64]| {
334 g[0] = -2.0 * (1.0 - x[0]) - 400.0 * x[0] * (x[1] - x[0] * x[0]);
335 g[1] = 200.0 * (x[1] - x[0] * x[0]);
336 };
337
338 let x0 = [-1.0, 1.0];
339 let opts = OptimOptions::default().gtol(1e-8).max_iter(2000);
340 let result = bfgs_minimize(f, grad, &x0, &opts).unwrap();
341
342 assert!(result.converged, "should converge: {}", result.message);
343 assert_abs_diff_eq!(result.x[0], 1.0, epsilon = 1e-5);
344 assert_abs_diff_eq!(result.x[1], 1.0, epsilon = 1e-5);
345 }
346
347 #[test]
348 fn test_bfgs_struct_api() {
349 let f = |x: &[f64]| x[0] * x[0] + x[1] * x[1];
351 let grad = |x: &[f64], g: &mut [f64]| {
352 g[0] = 2.0 * x[0];
353 g[1] = 2.0 * x[1];
354 };
355
356 let bfgs = Bfgs::new(OptimOptions::default().gtol(1e-10));
357 let result = bfgs.minimize(f, grad, &[3.0, 4.0]).unwrap();
358
359 assert!(result.converged);
360 assert_abs_diff_eq!(result.x[0], 0.0, epsilon = 1e-8);
361 assert_abs_diff_eq!(result.x[1], 0.0, epsilon = 1e-8);
362 assert!(result.iterations > 0);
363 assert!(result.n_feval > 0);
364 assert!(result.n_geval > 0);
365 }
366
367 #[test]
368 fn test_bfgs_history() {
369 let result = bfgs_minimize(
370 |x: &[f64]| x[0] * x[0] + x[1] * x[1],
371 |x: &[f64], g: &mut [f64]| {
372 g[0] = 2.0 * x[0];
373 g[1] = 2.0 * x[1];
374 },
375 &[5.0, 3.0],
376 &OptimOptions::default(),
377 )
378 .unwrap();
379 assert!(!result.history.is_empty(), "history should not be empty");
380 for w in result.history.windows(2) {
382 assert!(
383 w[1].objective <= w[0].objective + 1e-10,
384 "objective should decrease: {} -> {}",
385 w[0].objective,
386 w[1].objective
387 );
388 }
389 }
390
391 #[test]
392 fn test_bfgs_high_dim() {
393 let n = 20;
395 let f = |x: &[f64]| x.iter().copied().map(|xi| xi * xi).sum::<f64>();
396 let grad = |x: &[f64], g: &mut [f64]| {
397 for i in 0..x.len() {
398 g[i] = 2.0 * x[i];
399 }
400 };
401
402 let x0: Vec<f64> = (1..=n).map(|i| i as f64).collect();
403 let opts = OptimOptions::default().gtol(1e-10);
404 let result = bfgs_minimize(f, grad, &x0, &opts).unwrap();
405
406 assert!(result.converged, "should converge: {}", result.message);
407 for i in 0..n {
408 assert_abs_diff_eq!(result.x[i], 0.0, epsilon = 1e-6);
409 }
410 assert_abs_diff_eq!(result.f, 0.0, epsilon = 1e-10);
411 }
412}