1use super::types::{OptResult, Particle};
6use rand::RngExt;
7
8pub fn gradient_descent(
13 f: impl Fn(&[f64]) -> f64,
14 grad: impl Fn(&[f64]) -> Vec<f64>,
15 x0: Vec<f64>,
16 lr: f64,
17 tol: f64,
18 max_iter: u32,
19) -> OptResult {
20 let mut x = x0;
21 let mut converged = false;
22 let mut n_iter = 0u32;
23 for _ in 0..max_iter {
24 n_iter += 1;
25 let g = grad(&x);
26 let gnorm: f64 = g.iter().map(|v| v * v).sum::<f64>().sqrt();
27 if gnorm < tol {
28 converged = true;
29 break;
30 }
31 for (xi, gi) in x.iter_mut().zip(g.iter()) {
32 *xi -= lr * gi;
33 }
34 }
35 let f_val = f(&x);
36 OptResult {
37 x,
38 f_val,
39 n_iter,
40 converged,
41 }
42}
43#[allow(clippy::too_many_arguments)]
50pub fn adam(
51 f: impl Fn(&[f64]) -> f64,
52 grad: impl Fn(&[f64]) -> Vec<f64>,
53 x0: Vec<f64>,
54 lr: f64,
55 beta1: f64,
56 beta2: f64,
57 eps: f64,
58 max_iter: u32,
59) -> OptResult {
60 let n = x0.len();
61 let mut x = x0;
62 let mut m = vec![0.0f64; n];
63 let mut v = vec![0.0f64; n];
64 let mut converged = false;
65 let mut n_iter = 0u32;
66 for t in 1..=max_iter {
67 n_iter = t;
68 let g = grad(&x);
69 let gnorm: f64 = g.iter().map(|gi| gi * gi).sum::<f64>().sqrt();
70 if gnorm < 1e-7 {
71 converged = true;
72 break;
73 }
74 let b1t = beta1.powi(t as i32);
75 let b2t = beta2.powi(t as i32);
76 for i in 0..n {
77 m[i] = beta1 * m[i] + (1.0 - beta1) * g[i];
78 v[i] = beta2 * v[i] + (1.0 - beta2) * g[i] * g[i];
79 let m_hat = m[i] / (1.0 - b1t);
80 let v_hat = v[i] / (1.0 - b2t);
81 x[i] -= lr * m_hat / (v_hat.sqrt() + eps);
82 }
83 }
84 let f_val = f(&x);
85 OptResult {
86 x,
87 f_val,
88 n_iter,
89 converged,
90 }
91}
92pub(super) fn dot(a: &[f64], b: &[f64]) -> f64 {
93 a.iter().zip(b.iter()).map(|(ai, bi)| ai * bi).sum()
94}
95pub(super) fn axpy(alpha: f64, x: &[f64], y: &mut [f64]) {
96 for (yi, xi) in y.iter_mut().zip(x.iter()) {
97 *yi += alpha * xi;
98 }
99}
100pub fn lbfgs(
104 f: impl Fn(&[f64]) -> f64,
105 grad: impl Fn(&[f64]) -> Vec<f64>,
106 x0: Vec<f64>,
107 m: usize,
108 tol: f64,
109 max_iter: u32,
110) -> OptResult {
111 let _n = x0.len();
112 let mut x = x0;
113 let mut g = grad(&x);
114 let mut s_list: Vec<Vec<f64>> = Vec::with_capacity(m);
115 let mut y_list: Vec<Vec<f64>> = Vec::with_capacity(m);
116 let mut converged = false;
117 let mut n_iter = 0u32;
118 for _ in 0..max_iter {
119 n_iter += 1;
120 let gnorm: f64 = g.iter().map(|v| v * v).sum::<f64>().sqrt();
121 if gnorm < tol {
122 converged = true;
123 break;
124 }
125 let k = s_list.len();
126 let mut q = g.clone();
127 let mut alphas = vec![0.0f64; k];
128 for i in (0..k).rev() {
129 let rho_i = 1.0 / dot(&y_list[i], &s_list[i]);
130 let a = rho_i * dot(&s_list[i], &q);
131 alphas[i] = a;
132 axpy(-a, &y_list[i].clone(), &mut q);
133 }
134 let mut r = if k > 0 {
135 let scale = dot(&s_list[k - 1], &y_list[k - 1])
136 / dot(&y_list[k - 1], &y_list[k - 1]).max(1e-15);
137 q.iter().map(|qi| scale * qi).collect::<Vec<_>>()
138 } else {
139 q.clone()
140 };
141 for i in 0..k {
142 let rho_i = 1.0 / dot(&y_list[i], &s_list[i]);
143 let beta = rho_i * dot(&y_list[i], &r);
144 axpy(alphas[i] - beta, &s_list[i].clone(), &mut r);
145 }
146 let direction: Vec<f64> = r.iter().map(|ri| -*ri).collect();
147 let f0 = f(&x);
148 let gd = dot(&g, &direction);
149 let alpha = backtracking_line_search(&f, &x, &direction, f0, gd, 1.0, 0.5, 1e-4);
150 let s: Vec<f64> = direction.iter().map(|di| alpha * di).collect();
151 let x_new: Vec<f64> = x.iter().zip(s.iter()).map(|(xi, si)| xi + si).collect();
152 let g_new = grad(&x_new);
153 let y: Vec<f64> = g_new
154 .iter()
155 .zip(g.iter())
156 .map(|(gni, gi)| gni - gi)
157 .collect();
158 let sy = dot(&s, &y);
159 if sy > 1e-15 {
160 if s_list.len() == m {
161 s_list.remove(0);
162 y_list.remove(0);
163 }
164 s_list.push(s);
165 y_list.push(y);
166 }
167 x = x_new;
168 g = g_new;
169 }
170 let f_val = f(&x);
171 OptResult {
172 x,
173 f_val,
174 n_iter,
175 converged,
176 }
177}
178pub fn nelder_mead(
180 f: impl Fn(&[f64]) -> f64,
181 x0: Vec<f64>,
182 step: f64,
183 tol: f64,
184 max_iter: u32,
185) -> OptResult {
186 let n = x0.len();
187 let mut simplex: Vec<Vec<f64>> = Vec::with_capacity(n + 1);
188 simplex.push(x0.clone());
189 for i in 0..n {
190 let mut v = x0.clone();
191 v[i] += step;
192 simplex.push(v);
193 }
194 let mut fvals: Vec<f64> = simplex.iter().map(|v| f(v)).collect();
195 let mut n_iter = 0u32;
196 let mut converged = false;
197 let alpha = 1.0_f64;
198 let gamma = 2.0_f64;
199 let rho = 0.5_f64;
200 let sigma = 0.5_f64;
201 for _ in 0..max_iter {
202 n_iter += 1;
203 let mut order: Vec<usize> = (0..=n).collect();
204 order.sort_by(|&a, &b| {
205 fvals[a]
206 .partial_cmp(&fvals[b])
207 .unwrap_or(std::cmp::Ordering::Equal)
208 });
209 simplex = order.iter().map(|&i| simplex[i].clone()).collect();
210 fvals = order.iter().map(|&i| fvals[i]).collect();
211 let fmean = fvals.iter().sum::<f64>() / (n + 1) as f64;
212 let fstd: f64 =
213 (fvals.iter().map(|fi| (fi - fmean).powi(2)).sum::<f64>() / (n + 1) as f64).sqrt();
214 if fstd < tol {
215 converged = true;
216 break;
217 }
218 let mut centroid = vec![0.0f64; n];
219 for v in &simplex[..n] {
220 for (c, vi) in centroid.iter_mut().zip(v.iter()) {
221 *c += vi;
222 }
223 }
224 for c in centroid.iter_mut() {
225 *c /= n as f64;
226 }
227 let xr: Vec<f64> = centroid
228 .iter()
229 .zip(simplex[n].iter())
230 .map(|(c, w)| c + alpha * (c - w))
231 .collect();
232 let fr = f(&xr);
233 if fr < fvals[0] {
234 let xe: Vec<f64> = centroid
235 .iter()
236 .zip(xr.iter())
237 .map(|(c, r)| c + gamma * (r - c))
238 .collect();
239 let fe = f(&xe);
240 if fe < fr {
241 simplex[n] = xe;
242 fvals[n] = fe;
243 } else {
244 simplex[n] = xr;
245 fvals[n] = fr;
246 }
247 } else if fr < fvals[n - 1] {
248 simplex[n] = xr;
249 fvals[n] = fr;
250 } else {
251 let xc: Vec<f64> = centroid
252 .iter()
253 .zip(simplex[n].iter())
254 .map(|(c, w)| c + rho * (w - c))
255 .collect();
256 let fc = f(&xc);
257 if fc < fvals[n] {
258 simplex[n] = xc;
259 fvals[n] = fc;
260 } else {
261 let x0b = simplex[0].clone();
262 for i in 1..=n {
263 simplex[i] = x0b
264 .iter()
265 .zip(simplex[i].iter())
266 .map(|(b, v)| b + sigma * (v - b))
267 .collect();
268 fvals[i] = f(&simplex[i]);
269 }
270 }
271 }
272 }
273 let f_val = fvals[0];
274 OptResult {
275 x: simplex[0].clone(),
276 f_val,
277 n_iter,
278 converged,
279 }
280}
281pub fn golden_section(f: impl Fn(f64) -> f64, a: f64, b: f64, tol: f64) -> (f64, f64) {
285 let phi = (5.0_f64.sqrt() - 1.0) / 2.0;
286 let mut lo = a;
287 let mut hi = b;
288 let mut c = hi - phi * (hi - lo);
289 let mut d = lo + phi * (hi - lo);
290 let mut fc = f(c);
291 let mut fd = f(d);
292 while (hi - lo).abs() > tol {
293 if fc < fd {
294 hi = d;
295 d = c;
296 fd = fc;
297 c = hi - phi * (hi - lo);
298 fc = f(c);
299 } else {
300 lo = c;
301 c = d;
302 fc = fd;
303 d = lo + phi * (hi - lo);
304 fd = f(d);
305 }
306 }
307 let x_min = (lo + hi) / 2.0;
308 (x_min, f(x_min))
309}
310pub fn brent(f: impl Fn(f64) -> f64, a: f64, b: f64, c: f64, tol: f64) -> (f64, f64) {
315 let gold = 0.381_966_011_250_105;
316 let tiny = 1e-10;
317 let mut x = b;
318 let mut w = b;
319 let mut v = b;
320 let mut fx = f(x);
321 let mut fw = fx;
322 let mut fv = fx;
323 let mut lo = a.min(c);
324 let mut hi = a.max(c);
325 let mut d = 0.0f64;
326 let mut e = 0.0f64;
327 for _ in 0..500 {
328 let xm = 0.5 * (lo + hi);
329 let tol1 = tol * x.abs() + tiny;
330 let tol2 = 2.0 * tol1;
331 if (x - xm).abs() <= tol2 - 0.5 * (hi - lo) {
332 return (x, fx);
333 }
334 let mut use_gold = true;
335 if e.abs() > tol1 {
336 let r = (x - w) * (fx - fv);
337 let q_val = (x - v) * (fx - fw);
338 let p = (x - v) * q_val - (x - w) * r;
339 let mut q2 = 2.0 * (q_val - r);
340 let p = if q2 > 0.0 { -p } else { p };
341 q2 = q2.abs();
342 if p.abs() < (0.5 * q2 * e).abs() && p > q2 * (lo - x) && p < q2 * (hi - x) {
343 e = d;
344 d = p / q2;
345 use_gold = false;
346 let u = x + d;
347 if (u - lo) < tol2 || (hi - u) < tol2 {
348 d = if xm >= x { tol1 } else { -tol1 };
349 }
350 }
351 }
352 if use_gold {
353 e = if x >= xm { lo - x } else { hi - x };
354 d = gold * e;
355 }
356 let u = x + if d.abs() >= tol1 {
357 d
358 } else if d >= 0.0 {
359 tol1
360 } else {
361 -tol1
362 };
363 let fu = f(u);
364 if fu <= fx {
365 if u < x {
366 hi = x;
367 } else {
368 lo = x;
369 }
370 v = w;
371 fv = fw;
372 w = x;
373 fw = fx;
374 x = u;
375 fx = fu;
376 } else {
377 if u < x {
378 lo = u;
379 } else {
380 hi = u;
381 }
382 if fu <= fw || (w - x).abs() < tiny {
383 v = w;
384 fv = fw;
385 w = u;
386 fw = fu;
387 } else if fu <= fv || (v - x).abs() < tiny || (v - w).abs() < tiny {
388 v = u;
389 fv = fu;
390 }
391 }
392 }
393 (x, fx)
394}
395pub fn bisection(f: impl Fn(f64) -> f64, a: f64, b: f64, tol: f64) -> Option<f64> {
401 let mut lo = a;
402 let mut hi = b;
403 let flo = f(lo);
404 let fhi = f(hi);
405 if flo * fhi > 0.0 {
406 return None;
407 }
408 let mut flo_cur = flo;
409 for _ in 0..1000 {
410 if (hi - lo).abs() < tol {
411 break;
412 }
413 let mid = (lo + hi) / 2.0;
414 let fmid = f(mid);
415 if fmid * flo_cur <= 0.0 {
416 hi = mid;
417 } else {
418 lo = mid;
419 flo_cur = fmid;
420 }
421 }
422 Some((lo + hi) / 2.0)
423}
424pub fn numerical_gradient(f: impl Fn(&[f64]) -> f64, x: &[f64], h: f64) -> Vec<f64> {
426 let n = x.len();
427 let mut grad = vec![0.0f64; n];
428 let mut xp = x.to_vec();
429 let mut xm = x.to_vec();
430 for i in 0..n {
431 xp[i] += h;
432 xm[i] -= h;
433 grad[i] = (f(&xp) - f(&xm)) / (2.0 * h);
434 xp[i] = x[i];
435 xm[i] = x[i];
436 }
437 grad
438}
439pub fn numerical_hessian(f: impl Fn(&[f64]) -> f64, x: &[f64], h: f64) -> Vec<Vec<f64>> {
441 let n = x.len();
442 let f0 = f(x);
443 let mut hess = vec![vec![0.0f64; n]; n];
444 for i in 0..n {
445 for j in 0..=i {
446 if i == j {
447 let mut xp = x.to_vec();
448 let mut xm = x.to_vec();
449 xp[i] += h;
450 xm[i] -= h;
451 hess[i][j] = (f(&xp) - 2.0 * f0 + f(&xm)) / (h * h);
452 } else {
453 let mut xpp = x.to_vec();
454 let mut xpm = x.to_vec();
455 let mut xmp = x.to_vec();
456 let mut xmm = x.to_vec();
457 xpp[i] += h;
458 xpp[j] += h;
459 xpm[i] += h;
460 xpm[j] -= h;
461 xmp[i] -= h;
462 xmp[j] += h;
463 xmm[i] -= h;
464 xmm[j] -= h;
465 let val = (f(&xpp) - f(&xpm) - f(&xmp) + f(&xmm)) / (4.0 * h * h);
466 hess[i][j] = val;
467 hess[j][i] = val;
468 }
469 }
470 }
471 hess
472}
473#[allow(clippy::too_many_arguments)]
478pub fn backtracking_line_search(
479 f: impl Fn(&[f64]) -> f64,
480 x: &[f64],
481 direction: &[f64],
482 f0: f64,
483 grad_dot: f64,
484 alpha0: f64,
485 rho: f64,
486 c: f64,
487) -> f64 {
488 let mut alpha = alpha0;
489 for _ in 0..100 {
490 let x_new: Vec<f64> = x
491 .iter()
492 .zip(direction.iter())
493 .map(|(xi, di)| xi + alpha * di)
494 .collect();
495 if f(&x_new) <= f0 + c * alpha * grad_dot {
496 break;
497 }
498 alpha *= rho;
499 }
500 alpha
501}
502pub fn constrained_min_box(
506 f: impl Fn(&[f64]) -> f64,
507 grad: impl Fn(&[f64]) -> Vec<f64>,
508 x0: Vec<f64>,
509 lb: &[f64],
510 ub: &[f64],
511 tol: f64,
512 max_iter: u32,
513) -> OptResult {
514 let n = x0.len();
515 let mut x = x0;
516 for i in 0..n {
517 x[i] = x[i].clamp(lb[i], ub[i]);
518 }
519 let mut converged = false;
520 let mut n_iter = 0u32;
521 let mut lr = 1.0f64;
522 for _ in 0..max_iter {
523 n_iter += 1;
524 let g = grad(&x);
525 let gnorm: f64 = g.iter().map(|v| v * v).sum::<f64>().sqrt();
526 if gnorm < tol {
527 converged = true;
528 break;
529 }
530 let f0 = f(&x);
531 let grad_dot = -g.iter().map(|gi| gi * gi).sum::<f64>();
532 let dir: Vec<f64> = g.iter().map(|gi| -*gi).collect();
533 let mut accepted = false;
534 for _ in 0..30 {
535 let mut x_try: Vec<f64> = x
536 .iter()
537 .zip(dir.iter())
538 .map(|(xi, di)| xi + lr * di)
539 .collect();
540 for i in 0..n {
541 x_try[i] = x_try[i].clamp(lb[i], ub[i]);
542 }
543 if f(&x_try) <= f0 + 1e-4 * lr * grad_dot {
544 x = x_try;
545 lr *= 1.2;
546 lr = lr.min(1.0);
547 accepted = true;
548 break;
549 }
550 lr *= 0.5;
551 }
552 if !accepted {
553 break;
554 }
555 }
556 let f_val = f(&x);
557 OptResult {
558 x,
559 f_val,
560 n_iter,
561 converged,
562 }
563}
564pub fn gradient_descent_momentum(
568 f: impl Fn(&[f64]) -> f64,
569 grad: impl Fn(&[f64]) -> Vec<f64>,
570 x0: Vec<f64>,
571 lr: f64,
572 momentum: f64,
573 tol: f64,
574 max_iter: u32,
575) -> OptResult {
576 let n = x0.len();
577 let mut x = x0;
578 let mut vel = vec![0.0f64; n];
579 let mut converged = false;
580 let mut n_iter = 0u32;
581 for _ in 0..max_iter {
582 n_iter += 1;
583 let g = grad(&x);
584 let gnorm: f64 = g.iter().map(|v| v * v).sum::<f64>().sqrt();
585 if gnorm < tol {
586 converged = true;
587 break;
588 }
589 for i in 0..n {
590 vel[i] = momentum * vel[i] - lr * g[i];
591 x[i] += vel[i];
592 }
593 }
594 let f_val = f(&x);
595 OptResult {
596 x,
597 f_val,
598 n_iter,
599 converged,
600 }
601}
602pub fn simulated_annealing(
609 f: impl Fn(&[f64]) -> f64,
610 x0: Vec<f64>,
611 initial_temp: f64,
612 cooling_rate: f64,
613 step_size: f64,
614 max_iter: u32,
615) -> OptResult {
616 use rand::RngExt;
617 let mut x = x0.clone();
618 let mut f_cur = f(&x);
619 let mut best_x = x.clone();
620 let mut best_f = f_cur;
621 let mut temp = initial_temp;
622 let mut rng = rand::rng();
623 for _ in 0..max_iter {
624 let x_new: Vec<f64> = x
625 .iter()
626 .map(|xi| xi + (rng.random::<f64>() - 0.5) * 2.0 * step_size)
627 .collect();
628 let f_new = f(&x_new);
629 let delta = f_new - f_cur;
630 if delta < 0.0 || (temp > 1e-300 && rng.random::<f64>() < (-delta / temp).exp()) {
631 x = x_new;
632 f_cur = f_new;
633 if f_cur < best_f {
634 best_f = f_cur;
635 best_x = x.clone();
636 }
637 }
638 temp *= cooling_rate;
639 }
640 OptResult {
641 x: best_x,
642 f_val: best_f,
643 n_iter: max_iter,
644 converged: temp < 1e-10,
645 }
646}
647#[allow(clippy::too_many_arguments)]
656pub fn particle_swarm(
657 f: impl Fn(&[f64]) -> f64,
658 lb: &[f64],
659 ub: &[f64],
660 n_particles: usize,
661 w: f64,
662 c1: f64,
663 c2: f64,
664 max_iter: u32,
665) -> OptResult {
666 let n = lb.len();
667 let mut rng = rand::rng();
668 let mut particles: Vec<Particle> = (0..n_particles)
669 .map(|_| {
670 let pos: Vec<f64> = (0..n)
671 .map(|i| lb[i] + rng.random::<f64>() * (ub[i] - lb[i]))
672 .collect();
673 let vel: Vec<f64> = (0..n)
674 .map(|i| (rng.random::<f64>() - 0.5) * (ub[i] - lb[i]) * 0.1)
675 .collect();
676 let val = f(&pos);
677 Particle {
678 best_pos: pos.clone(),
679 best_val: val,
680 pos,
681 vel,
682 }
683 })
684 .collect();
685 let init_best = particles
686 .iter()
687 .min_by(|a, b| {
688 a.best_val
689 .partial_cmp(&b.best_val)
690 .unwrap_or(std::cmp::Ordering::Equal)
691 })
692 .expect("particles is non-empty");
693 let mut global_best_pos = init_best.best_pos.clone();
694 let mut global_best_val = init_best.best_val;
695 for _ in 0..max_iter {
696 for p in particles.iter_mut() {
697 for i in 0..n {
698 let r1: f64 = rng.random();
699 let r2: f64 = rng.random();
700 p.vel[i] = w * p.vel[i]
701 + c1 * r1 * (p.best_pos[i] - p.pos[i])
702 + c2 * r2 * (global_best_pos[i] - p.pos[i]);
703 p.pos[i] = (p.pos[i] + p.vel[i]).clamp(lb[i], ub[i]);
704 }
705 let val = f(&p.pos);
706 if val < p.best_val {
707 p.best_val = val;
708 p.best_pos = p.pos.clone();
709 if val < global_best_val {
710 global_best_val = val;
711 global_best_pos = p.pos.clone();
712 }
713 }
714 }
715 }
716 OptResult {
717 x: global_best_pos,
718 f_val: global_best_val,
719 n_iter: max_iter,
720 converged: true,
721 }
722}
723#[allow(clippy::too_many_arguments)]
728pub fn genetic_algorithm(
729 f: impl Fn(&[f64]) -> f64,
730 lb: &[f64],
731 ub: &[f64],
732 pop_size: usize,
733 crossover_rate: f64,
734 mutation_rate: f64,
735 mutation_scale: f64,
736 max_generations: u32,
737) -> OptResult {
738 let n = lb.len();
739 let mut rng = rand::rng();
740 let mut pop: Vec<Vec<f64>> = (0..pop_size)
741 .map(|_| {
742 (0..n)
743 .map(|i| lb[i] + rng.random::<f64>() * (ub[i] - lb[i]))
744 .collect()
745 })
746 .collect();
747 let mut fitness: Vec<f64> = pop.iter().map(|x| f(x)).collect();
748 for _ in 0..max_generations {
749 let mut new_pop: Vec<Vec<f64>> = Vec::with_capacity(pop_size);
750 for _ in 0..pop_size {
751 let a_idx = {
752 let i = (rng.random::<f64>() * pop_size as f64) as usize % pop_size;
753 let j = (rng.random::<f64>() * pop_size as f64) as usize % pop_size;
754 if fitness[i] < fitness[j] { i } else { j }
755 };
756 let b_idx = {
757 let i = (rng.random::<f64>() * pop_size as f64) as usize % pop_size;
758 let j = (rng.random::<f64>() * pop_size as f64) as usize % pop_size;
759 if fitness[i] < fitness[j] { i } else { j }
760 };
761 let mut child = pop[a_idx].clone();
762 if rng.random::<f64>() < crossover_rate {
763 let alpha: f64 = rng.random();
764 child = child
765 .iter()
766 .zip(pop[b_idx].iter())
767 .map(|(&a, &b)| alpha * a + (1.0 - alpha) * b)
768 .collect();
769 }
770 for i in 0..n {
771 if rng.random::<f64>() < mutation_rate {
772 let delta = (rng.random::<f64>() - 0.5) * 2.0 * mutation_scale;
773 child[i] = (child[i] + delta).clamp(lb[i], ub[i]);
774 }
775 }
776 new_pop.push(child);
777 }
778 pop = new_pop;
779 fitness = pop.iter().map(|x| f(x)).collect();
780 }
781 let best_idx = fitness
782 .iter()
783 .enumerate()
784 .min_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
785 .map(|(i, _)| i)
786 .unwrap_or(0);
787 OptResult {
788 x: pop[best_idx].clone(),
789 f_val: fitness[best_idx],
790 n_iter: max_generations,
791 converged: true,
792 }
793}
794#[allow(clippy::too_many_arguments)]
799pub fn lbfgsb(
800 f: impl Fn(&[f64]) -> f64,
801 grad: impl Fn(&[f64]) -> Vec<f64>,
802 x0: Vec<f64>,
803 lb: &[f64],
804 ub: &[f64],
805 m: usize,
806 tol: f64,
807 max_iter: u32,
808) -> OptResult {
809 let mut x: Vec<f64> = x0
810 .iter()
811 .enumerate()
812 .map(|(i, &xi)| xi.clamp(lb[i], ub[i]))
813 .collect();
814 let mut g = grad(&x);
815 let mut s_list: Vec<Vec<f64>> = Vec::with_capacity(m);
816 let mut y_list: Vec<Vec<f64>> = Vec::with_capacity(m);
817 let mut converged = false;
818 let mut n_iter = 0u32;
819 for _ in 0..max_iter {
820 n_iter += 1;
821 let gnorm: f64 = g.iter().map(|v| v * v).sum::<f64>().sqrt();
822 if gnorm < tol {
823 converged = true;
824 break;
825 }
826 let k = s_list.len();
827 let mut q = g.clone();
828 let mut alphas = vec![0.0f64; k];
829 for i in (0..k).rev() {
830 let sy = dot(&y_list[i], &s_list[i]);
831 if sy.abs() < 1e-15 {
832 continue;
833 }
834 let rho_i = 1.0 / sy;
835 let a = rho_i * dot(&s_list[i], &q);
836 alphas[i] = a;
837 axpy(-a, &y_list[i].clone(), &mut q);
838 }
839 let mut r = if k > 0 {
840 let scale = dot(&s_list[k - 1], &y_list[k - 1])
841 / dot(&y_list[k - 1], &y_list[k - 1]).max(1e-15);
842 q.iter().map(|qi| scale * qi).collect::<Vec<_>>()
843 } else {
844 q.clone()
845 };
846 for i in 0..k {
847 let sy = dot(&y_list[i], &s_list[i]);
848 if sy.abs() < 1e-15 {
849 continue;
850 }
851 let rho_i = 1.0 / sy;
852 let beta = rho_i * dot(&y_list[i], &r);
853 axpy(alphas[i] - beta, &s_list[i].clone(), &mut r);
854 }
855 let direction: Vec<f64> = r.iter().map(|ri| -*ri).collect();
856 let proj_dir: Vec<f64> = direction
857 .iter()
858 .enumerate()
859 .map(|(i, &di)| {
860 if (x[i] <= lb[i] && di < 0.0) || (x[i] >= ub[i] && di > 0.0) {
861 0.0
862 } else {
863 di
864 }
865 })
866 .collect();
867 let f0 = f(&x);
868 let gd = dot(&g, &proj_dir);
869 let alpha = backtracking_line_search(&f, &x, &proj_dir, f0, gd, 1.0, 0.5, 1e-4);
870 let s: Vec<f64> = proj_dir.iter().map(|di| alpha * di).collect();
871 let x_new: Vec<f64> = x
872 .iter()
873 .zip(s.iter())
874 .enumerate()
875 .map(|(i, (xi, si))| (xi + si).clamp(lb[i], ub[i]))
876 .collect();
877 let g_new = grad(&x_new);
878 let y: Vec<f64> = g_new
879 .iter()
880 .zip(g.iter())
881 .map(|(gni, gi)| gni - gi)
882 .collect();
883 let sy = dot(&s, &y);
884 if sy > 1e-15 {
885 if s_list.len() == m {
886 s_list.remove(0);
887 y_list.remove(0);
888 }
889 s_list.push(s);
890 y_list.push(y);
891 }
892 x = x_new;
893 g = g_new;
894 }
895 let f_val = f(&x);
896 OptResult {
897 x,
898 f_val,
899 n_iter,
900 converged,
901 }
902}
903pub fn conjugate_gradient_minimize<F, G>(
908 f: F,
909 grad: G,
910 x0: Vec<f64>,
911 max_iter: usize,
912 tol: f64,
913) -> Vec<f64>
914where
915 F: Fn(&[f64]) -> f64,
916 G: Fn(&[f64]) -> Vec<f64>,
917{
918 let n = x0.len();
919 let mut x = x0;
920 let mut g = grad(&x);
921 let mut d: Vec<f64> = g.iter().map(|gi| -*gi).collect();
922 for iter in 0..max_iter {
923 let gnorm: f64 = g.iter().map(|v| v * v).sum::<f64>().sqrt();
924 if gnorm < tol {
925 break;
926 }
927 let f0 = f(&x);
928 let gd = g.iter().zip(d.iter()).map(|(gi, di)| gi * di).sum::<f64>();
929 if gd >= 0.0 {
930 d = g.iter().map(|gi| -*gi).collect();
931 }
932 let gd = g.iter().zip(d.iter()).map(|(gi, di)| gi * di).sum::<f64>();
933 let alpha = backtracking_line_search(&f, &x, &d, f0, gd, 1.0, 0.5, 1e-4);
934 let x_new: Vec<f64> = (0..n).map(|i| x[i] + alpha * d[i]).collect();
935 let g_new = grad(&x_new);
936 let g_new_sq: f64 = g_new.iter().map(|v| v * v).sum();
937 let g_sq: f64 = g.iter().map(|v| v * v).sum();
938 let beta = if g_sq < 1e-30 || (iter + 1) % n == 0 {
939 0.0
940 } else {
941 g_new_sq / g_sq
942 };
943 d = (0..n).map(|i| -g_new[i] + beta * d[i]).collect();
944 x = x_new;
945 g = g_new;
946 }
947 x
948}
949#[allow(clippy::too_many_arguments)]
957pub fn wolfe_line_search(
958 f: impl Fn(&[f64]) -> f64,
959 grad: impl Fn(&[f64]) -> Vec<f64>,
960 x: &[f64],
961 direction: &[f64],
962 f0: f64,
963 g0_dot_d: f64,
964 c1: f64,
965 c2: f64,
966 max_iter: usize,
967) -> f64 {
968 let n = x.len();
969 let mut alpha = 1.0f64;
970 let mut alpha_lo = 0.0f64;
971 let mut alpha_hi = f64::INFINITY;
972 for _ in 0..max_iter {
973 let x_new: Vec<f64> = (0..n).map(|i| x[i] + alpha * direction[i]).collect();
974 let f_new = f(&x_new);
975 if f_new > f0 + c1 * alpha * g0_dot_d {
976 alpha_hi = alpha;
977 alpha = 0.5 * (alpha_lo + alpha_hi);
978 continue;
979 }
980 let g_new = grad(&x_new);
981 let g_new_dot_d: f64 = g_new.iter().zip(direction.iter()).map(|(g, d)| g * d).sum();
982 if g_new_dot_d.abs() <= c2 * g0_dot_d.abs() {
983 return alpha;
984 }
985 if g_new_dot_d > 0.0 {
986 alpha_hi = alpha;
987 alpha = 0.5 * (alpha_lo + alpha_hi);
988 } else {
989 alpha_lo = alpha;
990 if alpha_hi.is_infinite() {
991 alpha *= 2.0;
992 } else {
993 alpha = 0.5 * (alpha_lo + alpha_hi);
994 }
995 }
996 }
997 alpha
998}
999#[allow(clippy::too_many_arguments)]
1010pub fn augmented_lagrangian<F, G, C, DC>(
1011 f: F,
1012 grad_f: G,
1013 constraints: &[C],
1014 grad_constraints: &[DC],
1015 x0: Vec<f64>,
1016 rho0: f64,
1017 outer_iter: usize,
1018 inner_iter: usize,
1019 tol: f64,
1020) -> OptResult
1021where
1022 F: Fn(&[f64]) -> f64,
1023 G: Fn(&[f64]) -> Vec<f64>,
1024 C: Fn(&[f64]) -> f64,
1025 DC: Fn(&[f64]) -> Vec<f64>,
1026{
1027 let n = x0.len();
1028 let m = constraints.len();
1029 let mut x = x0;
1030 let mut lam = vec![0.0f64; m];
1031 let mut rho = rho0;
1032 let mut converged = false;
1033 let mut total_iter = 0u32;
1034 for _outer in 0..outer_iter {
1035 for _inner in 0..inner_iter {
1036 total_iter += 1;
1037 let mut g = grad_f(&x);
1038 for (i, (ci, dci)) in constraints.iter().zip(grad_constraints.iter()).enumerate() {
1039 let cv = ci(&x);
1040 let dv = dci(&x);
1041 let coeff = lam[i] + rho * cv;
1042 for j in 0..n {
1043 g[j] += coeff * dv[j];
1044 }
1045 }
1046 let gnorm: f64 = g.iter().map(|v| v * v).sum::<f64>().sqrt();
1047 if gnorm < tol * 0.1 {
1048 break;
1049 }
1050 let mut lr = 0.1;
1051 let aug_f = |xp: &[f64]| {
1052 let mut val = f(xp);
1053 for (i, ci) in constraints.iter().enumerate() {
1054 let cv = ci(xp);
1055 val += lam[i] * cv + 0.5 * rho * cv * cv;
1056 }
1057 val
1058 };
1059 let f0 = aug_f(&x);
1060 for _ in 0..20 {
1061 let x_try: Vec<f64> = (0..n).map(|j| x[j] - lr * g[j]).collect();
1062 if aug_f(&x_try) < f0 {
1063 x = x_try;
1064 break;
1065 }
1066 lr *= 0.5;
1067 }
1068 }
1069 let mut max_viol = 0.0f64;
1070 for (i, ci) in constraints.iter().enumerate() {
1071 let cv = ci(&x);
1072 lam[i] += rho * cv;
1073 max_viol = max_viol.max(cv.abs());
1074 }
1075 if max_viol < tol {
1076 converged = true;
1077 break;
1078 }
1079 rho *= 2.0;
1080 }
1081 let f_val = f(&x);
1082 OptResult {
1083 x,
1084 f_val,
1085 n_iter: total_iter,
1086 converged,
1087 }
1088}
1089pub fn coordinate_descent(
1094 f: impl Fn(&[f64]) -> f64,
1095 x0: Vec<f64>,
1096 step: f64,
1097 tol: f64,
1098 max_iter: usize,
1099) -> OptResult {
1100 let n = x0.len();
1101 let mut x = x0;
1102 let mut converged = false;
1103 let mut n_iter = 0u32;
1104 for _iter in 0..max_iter {
1105 n_iter += 1;
1106 let x_old = x.clone();
1107 for d in 0..n {
1108 let (best_delta, _) = golden_section(
1109 |delta| {
1110 let mut xp = x.clone();
1111 xp[d] += delta;
1112 f(&xp)
1113 },
1114 -step,
1115 step,
1116 tol * 1e-2,
1117 );
1118 x[d] += best_delta;
1119 }
1120 let diff: f64 = x
1121 .iter()
1122 .zip(x_old.iter())
1123 .map(|(a, b)| (a - b).powi(2))
1124 .sum::<f64>()
1125 .sqrt();
1126 if diff < tol {
1127 converged = true;
1128 break;
1129 }
1130 }
1131 let f_val = f(&x);
1132 OptResult {
1133 x,
1134 f_val,
1135 n_iter,
1136 converged,
1137 }
1138}
1139pub fn powell(f: impl Fn(&[f64]) -> f64, x0: Vec<f64>, tol: f64, max_iter: usize) -> OptResult {
1144 let n = x0.len();
1145 let mut x = x0;
1146 let mut dirs: Vec<Vec<f64>> = (0..n)
1147 .map(|i| {
1148 let mut d = vec![0.0; n];
1149 d[i] = 1.0;
1150 d
1151 })
1152 .collect();
1153 let mut converged = false;
1154 let mut n_iter = 0u32;
1155 for _iter in 0..max_iter {
1156 n_iter += 1;
1157 let x_start = x.clone();
1158 let mut max_decrease = 0.0f64;
1159 let mut max_dir_idx = 0;
1160 for (k, dir) in dirs.iter().enumerate() {
1161 let f_before = f(&x);
1162 let (alpha, _) = golden_section(
1163 |a| {
1164 let xp: Vec<f64> = x
1165 .iter()
1166 .zip(dir.iter())
1167 .map(|(xi, di)| xi + a * di)
1168 .collect();
1169 f(&xp)
1170 },
1171 -10.0,
1172 10.0,
1173 tol * 0.01,
1174 );
1175 let xp: Vec<f64> = x
1176 .iter()
1177 .zip(dir.iter())
1178 .map(|(xi, di)| xi + alpha * di)
1179 .collect();
1180 let decrease = f_before - f(&xp);
1181 if decrease > max_decrease {
1182 max_decrease = decrease;
1183 max_dir_idx = k;
1184 }
1185 x = xp;
1186 }
1187 let diff: f64 = x
1188 .iter()
1189 .zip(x_start.iter())
1190 .map(|(a, b)| (a - b).powi(2))
1191 .sum::<f64>()
1192 .sqrt();
1193 if diff < tol {
1194 converged = true;
1195 break;
1196 }
1197 let new_dir: Vec<f64> = x.iter().zip(x_start.iter()).map(|(a, b)| a - b).collect();
1198 let norm: f64 = new_dir.iter().map(|v| v * v).sum::<f64>().sqrt();
1199 if norm > 1e-15 {
1200 dirs[max_dir_idx] = new_dir.iter().map(|v| v / norm).collect();
1201 }
1202 }
1203 let f_val = f(&x);
1204 OptResult {
1205 x,
1206 f_val,
1207 n_iter,
1208 converged,
1209 }
1210}
1211pub fn sgd_cosine_annealing(
1216 f: impl Fn(&[f64]) -> f64,
1217 stoch_grad: impl Fn(&[f64]) -> Vec<f64>,
1218 x0: Vec<f64>,
1219 lr_max: f64,
1220 lr_min: f64,
1221 max_iter: u32,
1222) -> OptResult {
1223 let n = x0.len();
1224 let mut x = x0;
1225 let mut n_iter = 0u32;
1226 for t in 0..max_iter {
1227 n_iter = t + 1;
1228 let cos_factor = 0.5 * (1.0 + (std::f64::consts::PI * t as f64 / max_iter as f64).cos());
1229 let lr = lr_min + (lr_max - lr_min) * cos_factor;
1230 let g = stoch_grad(&x);
1231 for i in 0..n {
1232 x[i] -= lr * g[i];
1233 }
1234 }
1235 let f_val = f(&x);
1236 let gnorm: f64 = stoch_grad(&x).iter().map(|v| v * v).sum::<f64>().sqrt();
1237 OptResult {
1238 x,
1239 f_val,
1240 n_iter,
1241 converged: gnorm < 1e-4,
1242 }
1243}
1244pub fn bfgs(
1250 f: impl Fn(&[f64]) -> f64,
1251 grad: impl Fn(&[f64]) -> Vec<f64>,
1252 x0: Vec<f64>,
1253 tol: f64,
1254 max_iter: usize,
1255) -> OptResult {
1256 let n = x0.len();
1257 let mut x = x0;
1258 let mut h: Vec<Vec<f64>> = (0..n)
1259 .map(|i| {
1260 let mut row = vec![0.0; n];
1261 row[i] = 1.0;
1262 row
1263 })
1264 .collect();
1265 let mut converged = false;
1266 let mut n_iter = 0u32;
1267 let mut g = grad(&x);
1268 for _ in 0..max_iter {
1269 n_iter += 1;
1270 let gnorm: f64 = g.iter().map(|v| v * v).sum::<f64>().sqrt();
1271 if gnorm < tol {
1272 converged = true;
1273 break;
1274 }
1275 let p: Vec<f64> = (0..n)
1276 .map(|i| {
1277 -h[i]
1278 .iter()
1279 .zip(g.iter())
1280 .map(|(hij, gj)| hij * gj)
1281 .sum::<f64>()
1282 })
1283 .collect();
1284 let f0 = f(&x);
1285 let gd: f64 = g.iter().zip(p.iter()).map(|(gi, pi)| gi * pi).sum();
1286 let alpha = backtracking_line_search(&f, &x, &p, f0, gd, 1.0, 0.5, 1e-4);
1287 let x_new: Vec<f64> = (0..n).map(|i| x[i] + alpha * p[i]).collect();
1288 let g_new = grad(&x_new);
1289 let s: Vec<f64> = (0..n).map(|i| x_new[i] - x[i]).collect();
1290 let y: Vec<f64> = (0..n).map(|i| g_new[i] - g[i]).collect();
1291 let sy: f64 = s.iter().zip(y.iter()).map(|(si, yi)| si * yi).sum();
1292 if sy > 1e-15 {
1293 let hy: Vec<f64> = (0..n)
1294 .map(|i| {
1295 h[i].iter()
1296 .zip(y.iter())
1297 .map(|(hij, yj)| hij * yj)
1298 .sum::<f64>()
1299 })
1300 .collect();
1301 let ythy: f64 = y.iter().zip(hy.iter()).map(|(yi, hyi)| yi * hyi).sum();
1302 let rho_bfgs = 1.0 / sy;
1303 for i in 0..n {
1304 for j in 0..n {
1305 h[i][j] += rho_bfgs * s[i] * s[j] - (hy[i] * s[j] + s[i] * hy[j]) / ythy;
1306 }
1307 }
1308 }
1309 x = x_new;
1310 g = g_new;
1311 }
1312 let f_val = f(&x);
1313 OptResult {
1314 x,
1315 f_val,
1316 n_iter,
1317 converged,
1318 }
1319}
1320pub fn clip_gradient(grad: &mut [f64], max_norm: f64) {
1324 let norm: f64 = grad.iter().map(|v| v * v).sum::<f64>().sqrt();
1325 if norm > max_norm && norm > 1e-15 {
1326 let scale = max_norm / norm;
1327 for g in grad.iter_mut() {
1328 *g *= scale;
1329 }
1330 }
1331}
1332pub fn gradient_descent_clipped(
1334 f: impl Fn(&[f64]) -> f64,
1335 grad: impl Fn(&[f64]) -> Vec<f64>,
1336 x0: Vec<f64>,
1337 lr: f64,
1338 max_norm: f64,
1339 tol: f64,
1340 max_iter: u32,
1341) -> OptResult {
1342 let mut x = x0;
1343 let mut converged = false;
1344 let mut n_iter = 0u32;
1345 for _ in 0..max_iter {
1346 n_iter += 1;
1347 let mut g = grad(&x);
1348 clip_gradient(&mut g, max_norm);
1349 let gnorm: f64 = g.iter().map(|v| v * v).sum::<f64>().sqrt();
1350 if gnorm < tol {
1351 converged = true;
1352 break;
1353 }
1354 for (xi, gi) in x.iter_mut().zip(g.iter()) {
1355 *xi -= lr * gi;
1356 }
1357 }
1358 let f_val = f(&x);
1359 OptResult {
1360 x,
1361 f_val,
1362 n_iter,
1363 converged,
1364 }
1365}
1366pub fn cholesky_lower(a: &[f64], n: usize) -> Vec<f64> {
1371 let mut l = vec![0.0_f64; n * n];
1372 for i in 0..n {
1373 for j in 0..=i {
1374 let s: f64 = (0..j).map(|k| l[i * n + k] * l[j * n + k]).sum();
1375 if i == j {
1376 let d = a[i * n + i] - s;
1377 l[i * n + j] = if d > 0.0 { d.sqrt() } else { 1e-8 };
1378 } else {
1379 let ljj = l[j * n + j];
1380 l[i * n + j] = if ljj.abs() > f64::EPSILON {
1381 (a[i * n + j] - s) / ljj
1382 } else {
1383 0.0
1384 };
1385 }
1386 }
1387 }
1388 l
1389}
1390pub fn forward_solve_lower(l: &[f64], b: &[f64], n: usize) -> Vec<f64> {
1392 let mut x = vec![0.0_f64; n];
1393 for i in 0..n {
1394 let s: f64 = (0..i).map(|j| l[i * n + j] * x[j]).sum();
1395 let lii = l[i * n + i];
1396 x[i] = if lii.abs() > f64::EPSILON {
1397 (b[i] - s) / lii
1398 } else {
1399 0.0
1400 };
1401 }
1402 x
1403}