1use crate::error::OptimizeError;
24use scirs2_core::ndarray::{s, Array1, Array2, ArrayView1};
25
26pub trait HessianApproximation: Send + Sync {
30 fn update(&mut self, s: &[f64], y: &[f64]) -> Result<(), OptimizeError>;
36
37 fn multiply(&self, v: &[f64]) -> Result<Vec<f64>, OptimizeError>;
39
40 fn inverse_multiply(&self, v: &[f64]) -> Result<Vec<f64>, OptimizeError>;
42
43 fn to_dense(&self) -> Array2<f64>;
45
46 fn reset(&mut self);
48
49 fn dim(&self) -> usize;
51}
52
53pub struct FiniteDiffHessian<F>
61where
62 F: Fn(&ArrayView1<f64>) -> f64 + Send + Sync,
63{
64 pub fun: F,
66 pub x: Vec<f64>,
68 pub step: f64,
70 hess: Array2<f64>,
72}
73
74impl<F> FiniteDiffHessian<F>
75where
76 F: Fn(&ArrayView1<f64>) -> f64 + Send + Sync,
77{
78 pub fn new(fun: F, x0: &[f64], step: f64) -> Result<Self, OptimizeError> {
80 let x = x0.to_vec();
81 let hess = compute_fd_hessian(&fun, &x, step)?;
82 Ok(Self { fun, x, step, hess })
83 }
84
85 pub fn with_default_step(fun: F, x0: &[f64]) -> Result<Self, OptimizeError> {
87 Self::new(fun, x0, f64::EPSILON.cbrt())
88 }
89
90 pub fn recompute_at(&mut self, x: &[f64]) -> Result<(), OptimizeError> {
92 self.x = x.to_vec();
93 self.hess = compute_fd_hessian(&self.fun, &self.x, self.step)?;
94 Ok(())
95 }
96}
97
98impl<F> HessianApproximation for FiniteDiffHessian<F>
99where
100 F: Fn(&ArrayView1<f64>) -> f64 + Send + Sync,
101{
102 fn update(&mut self, _s: &[f64], _y: &[f64]) -> Result<(), OptimizeError> {
103 let n = self.x.len();
105 let mut x_new = self.x.clone();
106 for i in 0..n {
107 x_new[i] += _s[i];
108 }
109 self.recompute_at(&x_new)
110 }
111
112 fn multiply(&self, v: &[f64]) -> Result<Vec<f64>, OptimizeError> {
113 let n = self.hess.nrows();
114 if v.len() != n {
115 return Err(OptimizeError::ValueError("Dimension mismatch".to_string()));
116 }
117 let mut result = vec![0.0f64; n];
118 for i in 0..n {
119 for j in 0..n {
120 result[i] += self.hess[[i, j]] * v[j];
121 }
122 }
123 Ok(result)
124 }
125
126 fn inverse_multiply(&self, v: &[f64]) -> Result<Vec<f64>, OptimizeError> {
127 let n = self.hess.nrows();
128 if v.len() != n {
129 return Err(OptimizeError::ValueError("Dimension mismatch".to_string()));
130 }
131 let mut a: Vec<Vec<f64>> = (0..n).map(|i| self.hess.row(i).to_vec()).collect();
133 let mut b = v.to_vec();
134 gaussian_solve(&mut a, &mut b).ok_or_else(|| {
135 OptimizeError::ComputationError("Hessian is singular; cannot invert".to_string())
136 })
137 }
138
139 fn to_dense(&self) -> Array2<f64> {
140 self.hess.clone()
141 }
142
143 fn reset(&mut self) {
144 let n = self.x.len();
145 self.hess = Array2::eye(n);
146 }
147
148 fn dim(&self) -> usize {
149 self.x.len()
150 }
151}
152
153pub struct SR1Update {
167 pub b: Array2<f64>,
169 pub n: usize,
171 pub r: f64,
173}
174
175impl SR1Update {
176 pub fn new(n: usize) -> Self {
178 Self {
179 b: Array2::eye(n),
180 n,
181 r: 1e-8,
182 }
183 }
184
185 pub fn with_safeguard(n: usize, r: f64) -> Self {
187 Self {
188 b: Array2::eye(n),
189 n,
190 r,
191 }
192 }
193}
194
195impl HessianApproximation for SR1Update {
196 fn update(&mut self, s: &[f64], y: &[f64]) -> Result<(), OptimizeError> {
197 let n = self.n;
198 if s.len() != n || y.len() != n {
199 return Err(OptimizeError::ValueError(
200 "Dimension mismatch in SR1".to_string(),
201 ));
202 }
203
204 let bs: Vec<f64> = (0..n)
206 .map(|i| (0..n).map(|j| self.b[[i, j]] * s[j]).sum())
207 .collect();
208 let u: Vec<f64> = (0..n).map(|i| y[i] - bs[i]).collect();
209
210 let uts: f64 = (0..n).map(|i| u[i] * s[i]).sum();
211 let u_norm: f64 = (0..n).map(|i| u[i] * u[i]).sum::<f64>().sqrt();
212 let s_norm: f64 = (0..n).map(|i| s[i] * s[i]).sum::<f64>().sqrt();
213
214 if uts.abs() <= self.r * u_norm * s_norm {
216 return Ok(());
217 }
218
219 for i in 0..n {
221 for j in 0..n {
222 self.b[[i, j]] += u[i] * u[j] / uts;
223 }
224 }
225
226 Ok(())
227 }
228
229 fn multiply(&self, v: &[f64]) -> Result<Vec<f64>, OptimizeError> {
230 let n = self.n;
231 if v.len() != n {
232 return Err(OptimizeError::ValueError("Dimension mismatch".to_string()));
233 }
234 let mut result = vec![0.0f64; n];
235 for i in 0..n {
236 for j in 0..n {
237 result[i] += self.b[[i, j]] * v[j];
238 }
239 }
240 Ok(result)
241 }
242
243 fn inverse_multiply(&self, v: &[f64]) -> Result<Vec<f64>, OptimizeError> {
244 let n = self.n;
245 if v.len() != n {
246 return Err(OptimizeError::ValueError("Dimension mismatch".to_string()));
247 }
248 let mut a: Vec<Vec<f64>> = (0..n).map(|i| self.b.row(i).to_vec()).collect();
249 let mut b = v.to_vec();
250 gaussian_solve(&mut a, &mut b)
251 .ok_or_else(|| OptimizeError::ComputationError("SR1 Hessian singular".to_string()))
252 }
253
254 fn to_dense(&self) -> Array2<f64> {
255 self.b.clone()
256 }
257
258 fn reset(&mut self) {
259 self.b = Array2::eye(self.n);
260 }
261
262 fn dim(&self) -> usize {
263 self.n
264 }
265}
266
267pub struct BFGSUpdate {
281 pub b: Array2<f64>,
283 pub h_inv: Array2<f64>,
285 pub n: usize,
287}
288
289impl BFGSUpdate {
290 pub fn new(n: usize) -> Self {
292 Self {
293 b: Array2::eye(n),
294 h_inv: Array2::eye(n),
295 n,
296 }
297 }
298}
299
300impl HessianApproximation for BFGSUpdate {
301 fn update(&mut self, s: &[f64], y: &[f64]) -> Result<(), OptimizeError> {
302 let n = self.n;
303 if s.len() != n || y.len() != n {
304 return Err(OptimizeError::ValueError(
305 "Dimension mismatch in BFGS".to_string(),
306 ));
307 }
308
309 let sy: f64 = (0..n).map(|i| s[i] * y[i]).sum();
310 if sy <= 1e-10 * (0..n).map(|i| y[i] * y[i]).sum::<f64>().sqrt() {
311 return Ok(());
313 }
314
315 let rho = 1.0 / sy;
316
317 let bs: Vec<f64> = (0..n)
320 .map(|i| (0..n).map(|j| self.b[[i, j]] * s[j]).sum())
321 .collect();
322 let s_bs: f64 = (0..n).map(|i| s[i] * bs[i]).sum();
323
324 for i in 0..n {
325 for j in 0..n {
326 self.b[[i, j]] += rho * y[i] * y[j] - bs[i] * bs[j] / s_bs.max(1e-300);
327 }
328 }
329
330 let mut h_new = Array2::zeros((n, n));
333 for i in 0..n {
334 for j in 0..n {
335 let mut val = 0.0f64;
336 for k in 0..n {
337 for l in 0..n {
338 let li = if i == k { 1.0 } else { 0.0 } - rho * s[i] * y[k];
339 let rj = if j == l { 1.0 } else { 0.0 } - rho * y[l] * s[j];
340 val += li * self.h_inv[[k, l]] * rj;
341 }
342 }
343 h_new[[i, j]] = val + rho * s[i] * s[j];
344 }
345 }
346 self.h_inv = h_new;
347
348 Ok(())
349 }
350
351 fn multiply(&self, v: &[f64]) -> Result<Vec<f64>, OptimizeError> {
352 let n = self.n;
353 if v.len() != n {
354 return Err(OptimizeError::ValueError("Dimension mismatch".to_string()));
355 }
356 let mut result = vec![0.0f64; n];
357 for i in 0..n {
358 for j in 0..n {
359 result[i] += self.b[[i, j]] * v[j];
360 }
361 }
362 Ok(result)
363 }
364
365 fn inverse_multiply(&self, v: &[f64]) -> Result<Vec<f64>, OptimizeError> {
366 let n = self.n;
367 if v.len() != n {
368 return Err(OptimizeError::ValueError("Dimension mismatch".to_string()));
369 }
370 let mut result = vec![0.0f64; n];
371 for i in 0..n {
372 for j in 0..n {
373 result[i] += self.h_inv[[i, j]] * v[j];
374 }
375 }
376 Ok(result)
377 }
378
379 fn to_dense(&self) -> Array2<f64> {
380 self.b.clone()
381 }
382
383 fn reset(&mut self) {
384 self.b = Array2::eye(self.n);
385 self.h_inv = Array2::eye(self.n);
386 }
387
388 fn dim(&self) -> usize {
389 self.n
390 }
391}
392
393pub struct DFP {
404 pub c: Array2<f64>,
406 pub n: usize,
408}
409
410impl DFP {
411 pub fn new(n: usize) -> Self {
413 Self {
414 c: Array2::eye(n),
415 n,
416 }
417 }
418}
419
420impl HessianApproximation for DFP {
421 fn update(&mut self, s: &[f64], y: &[f64]) -> Result<(), OptimizeError> {
422 let n = self.n;
423 if s.len() != n || y.len() != n {
424 return Err(OptimizeError::ValueError(
425 "Dimension mismatch in DFP".to_string(),
426 ));
427 }
428
429 let sy: f64 = (0..n).map(|i| s[i] * y[i]).sum();
430 if sy <= 1e-10 {
431 return Ok(());
432 }
433
434 let cy: Vec<f64> = (0..n)
436 .map(|i| (0..n).map(|j| self.c[[i, j]] * y[j]).sum())
437 .collect();
438 let y_cy: f64 = (0..n).map(|i| y[i] * cy[i]).sum();
439
440 for i in 0..n {
442 for j in 0..n {
443 self.c[[i, j]] += s[i] * s[j] / sy - cy[i] * cy[j] / y_cy.max(1e-300);
444 }
445 }
446
447 Ok(())
448 }
449
450 fn multiply(&self, v: &[f64]) -> Result<Vec<f64>, OptimizeError> {
451 let n = self.n;
452 if v.len() != n {
453 return Err(OptimizeError::ValueError("Dimension mismatch".to_string()));
454 }
455 let mut a: Vec<Vec<f64>> = (0..n).map(|i| self.c.row(i).to_vec()).collect();
457 let mut b = v.to_vec();
458 gaussian_solve(&mut a, &mut b)
459 .ok_or_else(|| OptimizeError::ComputationError("DFP inverse singular".to_string()))
460 }
461
462 fn inverse_multiply(&self, v: &[f64]) -> Result<Vec<f64>, OptimizeError> {
463 let n = self.n;
464 if v.len() != n {
465 return Err(OptimizeError::ValueError("Dimension mismatch".to_string()));
466 }
467 let mut result = vec![0.0f64; n];
468 for i in 0..n {
469 for j in 0..n {
470 result[i] += self.c[[i, j]] * v[j];
471 }
472 }
473 Ok(result)
474 }
475
476 fn to_dense(&self) -> Array2<f64> {
477 self.c.clone()
478 }
479
480 fn reset(&mut self) {
481 self.c = Array2::eye(self.n);
482 }
483
484 fn dim(&self) -> usize {
485 self.n
486 }
487}
488
489fn compute_fd_hessian<F>(fun: &F, x: &[f64], step: f64) -> Result<Array2<f64>, OptimizeError>
493where
494 F: Fn(&ArrayView1<f64>) -> f64,
495{
496 let n = x.len();
497 let mut hess = Array2::<f64>::zeros((n, n));
498 let mut x_tmp = Array1::from(x.to_vec());
499
500 let f0 = fun(&x_tmp.view());
501
502 for i in 0..n {
503 let hi = step * (1.0 + x[i].abs());
504
505 x_tmp[i] = x[i] + hi;
506 let fp = fun(&x_tmp.view());
507 x_tmp[i] = x[i] - hi;
508 let fm = fun(&x_tmp.view());
509 x_tmp[i] = x[i];
510
511 if !fp.is_finite() || !fm.is_finite() {
512 return Err(OptimizeError::ComputationError(
513 "Non-finite value computing Hessian diagonal".to_string(),
514 ));
515 }
516 hess[[i, i]] = (fp - 2.0 * f0 + fm) / (hi * hi);
517
518 for j in (i + 1)..n {
519 let hj = step * (1.0 + x[j].abs());
520
521 x_tmp[i] = x[i] + hi;
522 x_tmp[j] = x[j] + hj;
523 let fpp = fun(&x_tmp.view());
524
525 x_tmp[i] = x[i] + hi;
526 x_tmp[j] = x[j] - hj;
527 let fpm = fun(&x_tmp.view());
528
529 x_tmp[i] = x[i] - hi;
530 x_tmp[j] = x[j] + hj;
531 let fmp = fun(&x_tmp.view());
532
533 x_tmp[i] = x[i] - hi;
534 x_tmp[j] = x[j] - hj;
535 let fmm = fun(&x_tmp.view());
536
537 x_tmp[i] = x[i];
538 x_tmp[j] = x[j];
539
540 let val = (fpp - fpm - fmp + fmm) / (4.0 * hi * hj);
541 hess[[i, j]] = val;
542 hess[[j, i]] = val;
543 }
544 }
545
546 Ok(hess)
547}
548
549fn gaussian_solve(a: &mut Vec<Vec<f64>>, b: &mut Vec<f64>) -> Option<Vec<f64>> {
551 let n = b.len();
552
553 for col in 0..n {
554 let mut max_row = col;
556 let mut max_val = a[col][col].abs();
557 for row in (col + 1)..n {
558 let v = a[row][col].abs();
559 if v > max_val {
560 max_val = v;
561 max_row = row;
562 }
563 }
564 if max_val < 1e-14 {
565 return None;
566 }
567 a.swap(col, max_row);
568 b.swap(col, max_row);
569
570 let pivot = a[col][col];
571 for row in (col + 1)..n {
572 let factor = a[row][col] / pivot;
573 b[row] -= factor * b[col];
574 for k in col..n {
575 let v = a[col][k];
576 a[row][k] -= factor * v;
577 }
578 }
579 }
580
581 let mut x = vec![0.0f64; n];
583 for i in (0..n).rev() {
584 let mut sum = b[i];
585 for j in (i + 1)..n {
586 sum -= a[i][j] * x[j];
587 }
588 if a[i][i].abs() < 1e-14 {
589 return None;
590 }
591 x[i] = sum / a[i][i];
592 }
593 Some(x)
594}
595
596#[cfg(test)]
599mod tests {
600 use super::*;
601 use approx::assert_abs_diff_eq;
602
603 fn quadratic(x: &ArrayView1<f64>) -> f64 {
604 x[0] * x[0] + 4.0 * x[1] * x[1]
605 }
606
607 #[test]
608 fn test_bfgs_identity_update() {
609 let mut bfgs = BFGSUpdate::new(2);
610 let s = vec![-1.0, -1.0];
613 let y = vec![-2.0, -8.0];
614 bfgs.update(&s, &y).expect("BFGS update failed");
615
616 let v = vec![1.0, 0.0];
617 let hv = bfgs.multiply(&v).expect("multiply failed");
618 assert!(hv[0] > 0.0); }
621
622 #[test]
623 fn test_sr1_update() {
624 let mut sr1 = SR1Update::new(2);
625 let s = vec![1.0, 0.0];
626 let y = vec![2.0, 0.0]; sr1.update(&s, &y).expect("SR1 update failed");
628 let hv = sr1.multiply(&vec![1.0, 0.0]).expect("multiply failed");
629 assert_abs_diff_eq!(hv[0], 2.0, epsilon = 1e-8);
630 }
631
632 #[test]
633 fn test_dfp_inverse_multiply() {
634 let mut dfp = DFP::new(2);
635 let s = vec![1.0, 0.0];
636 let y = vec![2.0, 0.0];
637 dfp.update(&s, &y).expect("DFP update failed");
638 let sv = dfp.inverse_multiply(&y).expect("inverse multiply failed");
640 assert!(sv[0] > 0.0);
642 }
643
644 #[test]
645 fn test_fd_hessian_diagonal() {
646 let hess = compute_fd_hessian(&quadratic, &[0.0, 0.0], 1e-5).expect("FD Hessian failed");
648 assert_abs_diff_eq!(hess[[0, 0]], 2.0, epsilon = 1e-4);
649 assert_abs_diff_eq!(hess[[1, 1]], 8.0, epsilon = 1e-4);
650 assert_abs_diff_eq!(hess[[0, 1]], 0.0, epsilon = 1e-4);
651 }
652
653 #[test]
654 fn test_gaussian_solve() {
655 let mut a = vec![vec![2.0, 1.0], vec![1.0, 3.0]];
656 let mut b = vec![5.0, 7.0];
657 let x = gaussian_solve(&mut a, &mut b).expect("solve failed");
658 assert_abs_diff_eq!(x[0], 1.6, epsilon = 1e-10);
660 assert_abs_diff_eq!(x[1], 1.8, epsilon = 1e-10);
661 }
662
663 #[test]
664 fn test_bfgs_reset() {
665 let mut bfgs = BFGSUpdate::new(3);
666 bfgs.update(&[1.0, 0.0, 0.0], &[2.0, 0.0, 0.0])
667 .expect("update failed");
668 bfgs.reset();
669 let v = vec![1.0, 2.0, 3.0];
670 let hv = bfgs.multiply(&v).expect("multiply failed");
671 assert_abs_diff_eq!(hv[0], 1.0, epsilon = 1e-12);
673 assert_abs_diff_eq!(hv[1], 2.0, epsilon = 1e-12);
674 assert_abs_diff_eq!(hv[2], 3.0, epsilon = 1e-12);
675 }
676}