Skip to main content

oxiphysics_core/optimization/
functions.rs

1//! Auto-generated module
2//!
3//! đŸ€– Generated with [SplitRS](https://github.com/cool-japan/splitrs)
4
5use super::types::{OptResult, Particle};
6use rand::RngExt;
7
8/// Gradient descent with fixed learning rate.
9///
10/// Minimises `f` starting from `x0` using the gradient `grad`.
11/// Stops when the L2 norm of the gradient is below `tol`.
12pub fn gradient_descent(
13    f: impl Fn(&[f64]) -> f64,
14    grad: impl Fn(&[f64]) -> Vec<f64>,
15    x0: Vec<f64>,
16    lr: f64,
17    tol: f64,
18    max_iter: u32,
19) -> OptResult {
20    let mut x = x0;
21    let mut converged = false;
22    let mut n_iter = 0u32;
23    for _ in 0..max_iter {
24        n_iter += 1;
25        let g = grad(&x);
26        let gnorm: f64 = g.iter().map(|v| v * v).sum::<f64>().sqrt();
27        if gnorm < tol {
28            converged = true;
29            break;
30        }
31        for (xi, gi) in x.iter_mut().zip(g.iter()) {
32            *xi -= lr * gi;
33        }
34    }
35    let f_val = f(&x);
36    OptResult {
37        x,
38        f_val,
39        n_iter,
40        converged,
41    }
42}
43/// Adam adaptive-moment optimizer.
44///
45/// Uses the standard Adam update rule with bias correction.
46/// Stops when the L2 norm of the gradient falls below the default tolerance
47/// `1e-7`, or after `max_iter` steps (no separate `tol` parameter because
48/// Adam is typically used for a fixed budget).
49#[allow(clippy::too_many_arguments)]
50pub fn adam(
51    f: impl Fn(&[f64]) -> f64,
52    grad: impl Fn(&[f64]) -> Vec<f64>,
53    x0: Vec<f64>,
54    lr: f64,
55    beta1: f64,
56    beta2: f64,
57    eps: f64,
58    max_iter: u32,
59) -> OptResult {
60    let n = x0.len();
61    let mut x = x0;
62    let mut m = vec![0.0f64; n];
63    let mut v = vec![0.0f64; n];
64    let mut converged = false;
65    let mut n_iter = 0u32;
66    for t in 1..=max_iter {
67        n_iter = t;
68        let g = grad(&x);
69        let gnorm: f64 = g.iter().map(|gi| gi * gi).sum::<f64>().sqrt();
70        if gnorm < 1e-7 {
71            converged = true;
72            break;
73        }
74        let b1t = beta1.powi(t as i32);
75        let b2t = beta2.powi(t as i32);
76        for i in 0..n {
77            m[i] = beta1 * m[i] + (1.0 - beta1) * g[i];
78            v[i] = beta2 * v[i] + (1.0 - beta2) * g[i] * g[i];
79            let m_hat = m[i] / (1.0 - b1t);
80            let v_hat = v[i] / (1.0 - b2t);
81            x[i] -= lr * m_hat / (v_hat.sqrt() + eps);
82        }
83    }
84    let f_val = f(&x);
85    OptResult {
86        x,
87        f_val,
88        n_iter,
89        converged,
90    }
91}
92pub(super) fn dot(a: &[f64], b: &[f64]) -> f64 {
93    a.iter().zip(b.iter()).map(|(ai, bi)| ai * bi).sum()
94}
95pub(super) fn axpy(alpha: f64, x: &[f64], y: &mut [f64]) {
96    for (yi, xi) in y.iter_mut().zip(x.iter()) {
97        *yi += alpha * xi;
98    }
99}
100/// L-BFGS (Limited-memory Broyden–Fletcher–Goldfarb–Shanno).
101///
102/// Uses a history of size `m` curvature pairs and Wolfe-condition line search.
103pub fn lbfgs(
104    f: impl Fn(&[f64]) -> f64,
105    grad: impl Fn(&[f64]) -> Vec<f64>,
106    x0: Vec<f64>,
107    m: usize,
108    tol: f64,
109    max_iter: u32,
110) -> OptResult {
111    let _n = x0.len();
112    let mut x = x0;
113    let mut g = grad(&x);
114    let mut s_list: Vec<Vec<f64>> = Vec::with_capacity(m);
115    let mut y_list: Vec<Vec<f64>> = Vec::with_capacity(m);
116    let mut converged = false;
117    let mut n_iter = 0u32;
118    for _ in 0..max_iter {
119        n_iter += 1;
120        let gnorm: f64 = g.iter().map(|v| v * v).sum::<f64>().sqrt();
121        if gnorm < tol {
122            converged = true;
123            break;
124        }
125        let k = s_list.len();
126        let mut q = g.clone();
127        let mut alphas = vec![0.0f64; k];
128        for i in (0..k).rev() {
129            let rho_i = 1.0 / dot(&y_list[i], &s_list[i]);
130            let a = rho_i * dot(&s_list[i], &q);
131            alphas[i] = a;
132            axpy(-a, &y_list[i].clone(), &mut q);
133        }
134        let mut r = if k > 0 {
135            let scale = dot(&s_list[k - 1], &y_list[k - 1])
136                / dot(&y_list[k - 1], &y_list[k - 1]).max(1e-15);
137            q.iter().map(|qi| scale * qi).collect::<Vec<_>>()
138        } else {
139            q.clone()
140        };
141        for i in 0..k {
142            let rho_i = 1.0 / dot(&y_list[i], &s_list[i]);
143            let beta = rho_i * dot(&y_list[i], &r);
144            axpy(alphas[i] - beta, &s_list[i].clone(), &mut r);
145        }
146        let direction: Vec<f64> = r.iter().map(|ri| -*ri).collect();
147        let f0 = f(&x);
148        let gd = dot(&g, &direction);
149        let alpha = backtracking_line_search(&f, &x, &direction, f0, gd, 1.0, 0.5, 1e-4);
150        let s: Vec<f64> = direction.iter().map(|di| alpha * di).collect();
151        let x_new: Vec<f64> = x.iter().zip(s.iter()).map(|(xi, si)| xi + si).collect();
152        let g_new = grad(&x_new);
153        let y: Vec<f64> = g_new
154            .iter()
155            .zip(g.iter())
156            .map(|(gni, gi)| gni - gi)
157            .collect();
158        let sy = dot(&s, &y);
159        if sy > 1e-15 {
160            if s_list.len() == m {
161                s_list.remove(0);
162                y_list.remove(0);
163            }
164            s_list.push(s);
165            y_list.push(y);
166        }
167        x = x_new;
168        g = g_new;
169    }
170    let f_val = f(&x);
171    OptResult {
172        x,
173        f_val,
174        n_iter,
175        converged,
176    }
177}
178/// Nelder-Mead simplex method (derivative-free).
179pub fn nelder_mead(
180    f: impl Fn(&[f64]) -> f64,
181    x0: Vec<f64>,
182    step: f64,
183    tol: f64,
184    max_iter: u32,
185) -> OptResult {
186    let n = x0.len();
187    let mut simplex: Vec<Vec<f64>> = Vec::with_capacity(n + 1);
188    simplex.push(x0.clone());
189    for i in 0..n {
190        let mut v = x0.clone();
191        v[i] += step;
192        simplex.push(v);
193    }
194    let mut fvals: Vec<f64> = simplex.iter().map(|v| f(v)).collect();
195    let mut n_iter = 0u32;
196    let mut converged = false;
197    let alpha = 1.0_f64;
198    let gamma = 2.0_f64;
199    let rho = 0.5_f64;
200    let sigma = 0.5_f64;
201    for _ in 0..max_iter {
202        n_iter += 1;
203        let mut order: Vec<usize> = (0..=n).collect();
204        order.sort_by(|&a, &b| {
205            fvals[a]
206                .partial_cmp(&fvals[b])
207                .unwrap_or(std::cmp::Ordering::Equal)
208        });
209        simplex = order.iter().map(|&i| simplex[i].clone()).collect();
210        fvals = order.iter().map(|&i| fvals[i]).collect();
211        let fmean = fvals.iter().sum::<f64>() / (n + 1) as f64;
212        let fstd: f64 =
213            (fvals.iter().map(|fi| (fi - fmean).powi(2)).sum::<f64>() / (n + 1) as f64).sqrt();
214        if fstd < tol {
215            converged = true;
216            break;
217        }
218        let mut centroid = vec![0.0f64; n];
219        for v in &simplex[..n] {
220            for (c, vi) in centroid.iter_mut().zip(v.iter()) {
221                *c += vi;
222            }
223        }
224        for c in centroid.iter_mut() {
225            *c /= n as f64;
226        }
227        let xr: Vec<f64> = centroid
228            .iter()
229            .zip(simplex[n].iter())
230            .map(|(c, w)| c + alpha * (c - w))
231            .collect();
232        let fr = f(&xr);
233        if fr < fvals[0] {
234            let xe: Vec<f64> = centroid
235                .iter()
236                .zip(xr.iter())
237                .map(|(c, r)| c + gamma * (r - c))
238                .collect();
239            let fe = f(&xe);
240            if fe < fr {
241                simplex[n] = xe;
242                fvals[n] = fe;
243            } else {
244                simplex[n] = xr;
245                fvals[n] = fr;
246            }
247        } else if fr < fvals[n - 1] {
248            simplex[n] = xr;
249            fvals[n] = fr;
250        } else {
251            let xc: Vec<f64> = centroid
252                .iter()
253                .zip(simplex[n].iter())
254                .map(|(c, w)| c + rho * (w - c))
255                .collect();
256            let fc = f(&xc);
257            if fc < fvals[n] {
258                simplex[n] = xc;
259                fvals[n] = fc;
260            } else {
261                let x0b = simplex[0].clone();
262                for i in 1..=n {
263                    simplex[i] = x0b
264                        .iter()
265                        .zip(simplex[i].iter())
266                        .map(|(b, v)| b + sigma * (v - b))
267                        .collect();
268                    fvals[i] = f(&simplex[i]);
269                }
270            }
271        }
272    }
273    let f_val = fvals[0];
274    OptResult {
275        x: simplex[0].clone(),
276        f_val,
277        n_iter,
278        converged,
279    }
280}
281/// Golden-section search for a unimodal minimum on `[a, b]`.
282///
283/// Returns `(x_min, f_min)`.
284pub fn golden_section(f: impl Fn(f64) -> f64, a: f64, b: f64, tol: f64) -> (f64, f64) {
285    let phi = (5.0_f64.sqrt() - 1.0) / 2.0;
286    let mut lo = a;
287    let mut hi = b;
288    let mut c = hi - phi * (hi - lo);
289    let mut d = lo + phi * (hi - lo);
290    let mut fc = f(c);
291    let mut fd = f(d);
292    while (hi - lo).abs() > tol {
293        if fc < fd {
294            hi = d;
295            d = c;
296            fd = fc;
297            c = hi - phi * (hi - lo);
298            fc = f(c);
299        } else {
300            lo = c;
301            c = d;
302            fc = fd;
303            d = lo + phi * (hi - lo);
304            fd = f(d);
305        }
306    }
307    let x_min = (lo + hi) / 2.0;
308    (x_min, f(x_min))
309}
310/// Brent's method for 1-D minimization given a bracketing triple `(a, b, c)`
311/// where `b` is between `a` and `c` and `f(b) < f(a)`, `f(b) < f(c)`.
312///
313/// Returns `(x_min, f_min)`.
314pub fn brent(f: impl Fn(f64) -> f64, a: f64, b: f64, c: f64, tol: f64) -> (f64, f64) {
315    let gold = 0.381_966_011_250_105;
316    let tiny = 1e-10;
317    let mut x = b;
318    let mut w = b;
319    let mut v = b;
320    let mut fx = f(x);
321    let mut fw = fx;
322    let mut fv = fx;
323    let mut lo = a.min(c);
324    let mut hi = a.max(c);
325    let mut d = 0.0f64;
326    let mut e = 0.0f64;
327    for _ in 0..500 {
328        let xm = 0.5 * (lo + hi);
329        let tol1 = tol * x.abs() + tiny;
330        let tol2 = 2.0 * tol1;
331        if (x - xm).abs() <= tol2 - 0.5 * (hi - lo) {
332            return (x, fx);
333        }
334        let mut use_gold = true;
335        if e.abs() > tol1 {
336            let r = (x - w) * (fx - fv);
337            let q_val = (x - v) * (fx - fw);
338            let p = (x - v) * q_val - (x - w) * r;
339            let mut q2 = 2.0 * (q_val - r);
340            let p = if q2 > 0.0 { -p } else { p };
341            q2 = q2.abs();
342            if p.abs() < (0.5 * q2 * e).abs() && p > q2 * (lo - x) && p < q2 * (hi - x) {
343                e = d;
344                d = p / q2;
345                use_gold = false;
346                let u = x + d;
347                if (u - lo) < tol2 || (hi - u) < tol2 {
348                    d = if xm >= x { tol1 } else { -tol1 };
349                }
350            }
351        }
352        if use_gold {
353            e = if x >= xm { lo - x } else { hi - x };
354            d = gold * e;
355        }
356        let u = x + if d.abs() >= tol1 {
357            d
358        } else if d >= 0.0 {
359            tol1
360        } else {
361            -tol1
362        };
363        let fu = f(u);
364        if fu <= fx {
365            if u < x {
366                hi = x;
367            } else {
368                lo = x;
369            }
370            v = w;
371            fv = fw;
372            w = x;
373            fw = fx;
374            x = u;
375            fx = fu;
376        } else {
377            if u < x {
378                lo = u;
379            } else {
380                hi = u;
381            }
382            if fu <= fw || (w - x).abs() < tiny {
383                v = w;
384                fv = fw;
385                w = u;
386                fw = fu;
387            } else if fu <= fv || (v - x).abs() < tiny || (v - w).abs() < tiny {
388                v = u;
389                fv = fu;
390            }
391        }
392    }
393    (x, fx)
394}
395/// Bisection method for root finding on `[a, b]`.
396///
397/// Requires `f(a)` and `f(b)` to have opposite signs.
398/// Returns `Some(root)` when converged, or `None` if the sign condition is not
399/// satisfied.
400pub fn bisection(f: impl Fn(f64) -> f64, a: f64, b: f64, tol: f64) -> Option<f64> {
401    let mut lo = a;
402    let mut hi = b;
403    let flo = f(lo);
404    let fhi = f(hi);
405    if flo * fhi > 0.0 {
406        return None;
407    }
408    let mut flo_cur = flo;
409    for _ in 0..1000 {
410        if (hi - lo).abs() < tol {
411            break;
412        }
413        let mid = (lo + hi) / 2.0;
414        let fmid = f(mid);
415        if fmid * flo_cur <= 0.0 {
416            hi = mid;
417        } else {
418            lo = mid;
419            flo_cur = fmid;
420        }
421    }
422    Some((lo + hi) / 2.0)
423}
424/// Central-difference numerical gradient.
425pub fn numerical_gradient(f: impl Fn(&[f64]) -> f64, x: &[f64], h: f64) -> Vec<f64> {
426    let n = x.len();
427    let mut grad = vec![0.0f64; n];
428    let mut xp = x.to_vec();
429    let mut xm = x.to_vec();
430    for i in 0..n {
431        xp[i] += h;
432        xm[i] -= h;
433        grad[i] = (f(&xp) - f(&xm)) / (2.0 * h);
434        xp[i] = x[i];
435        xm[i] = x[i];
436    }
437    grad
438}
439/// Finite-difference Hessian matrix (central differences, second order).
440pub fn numerical_hessian(f: impl Fn(&[f64]) -> f64, x: &[f64], h: f64) -> Vec<Vec<f64>> {
441    let n = x.len();
442    let f0 = f(x);
443    let mut hess = vec![vec![0.0f64; n]; n];
444    for i in 0..n {
445        for j in 0..=i {
446            if i == j {
447                let mut xp = x.to_vec();
448                let mut xm = x.to_vec();
449                xp[i] += h;
450                xm[i] -= h;
451                hess[i][j] = (f(&xp) - 2.0 * f0 + f(&xm)) / (h * h);
452            } else {
453                let mut xpp = x.to_vec();
454                let mut xpm = x.to_vec();
455                let mut xmp = x.to_vec();
456                let mut xmm = x.to_vec();
457                xpp[i] += h;
458                xpp[j] += h;
459                xpm[i] += h;
460                xpm[j] -= h;
461                xmp[i] -= h;
462                xmp[j] += h;
463                xmm[i] -= h;
464                xmm[j] -= h;
465                let val = (f(&xpp) - f(&xpm) - f(&xmp) + f(&xmm)) / (4.0 * h * h);
466                hess[i][j] = val;
467                hess[j][i] = val;
468            }
469        }
470    }
471    hess
472}
473/// Backtracking line search satisfying the Armijo (sufficient-decrease)
474/// condition.
475///
476/// Returns the accepted step length `alpha`.
477#[allow(clippy::too_many_arguments)]
478pub fn backtracking_line_search(
479    f: impl Fn(&[f64]) -> f64,
480    x: &[f64],
481    direction: &[f64],
482    f0: f64,
483    grad_dot: f64,
484    alpha0: f64,
485    rho: f64,
486    c: f64,
487) -> f64 {
488    let mut alpha = alpha0;
489    for _ in 0..100 {
490        let x_new: Vec<f64> = x
491            .iter()
492            .zip(direction.iter())
493            .map(|(xi, di)| xi + alpha * di)
494            .collect();
495        if f(&x_new) <= f0 + c * alpha * grad_dot {
496            break;
497        }
498        alpha *= rho;
499    }
500    alpha
501}
502/// Box-constrained minimization via projected gradient descent.
503///
504/// Projects iterates onto `[lb[i\], ub[i]]` after each gradient step.
505pub fn constrained_min_box(
506    f: impl Fn(&[f64]) -> f64,
507    grad: impl Fn(&[f64]) -> Vec<f64>,
508    x0: Vec<f64>,
509    lb: &[f64],
510    ub: &[f64],
511    tol: f64,
512    max_iter: u32,
513) -> OptResult {
514    let n = x0.len();
515    let mut x = x0;
516    for i in 0..n {
517        x[i] = x[i].clamp(lb[i], ub[i]);
518    }
519    let mut converged = false;
520    let mut n_iter = 0u32;
521    let mut lr = 1.0f64;
522    for _ in 0..max_iter {
523        n_iter += 1;
524        let g = grad(&x);
525        let gnorm: f64 = g.iter().map(|v| v * v).sum::<f64>().sqrt();
526        if gnorm < tol {
527            converged = true;
528            break;
529        }
530        let f0 = f(&x);
531        let grad_dot = -g.iter().map(|gi| gi * gi).sum::<f64>();
532        let dir: Vec<f64> = g.iter().map(|gi| -*gi).collect();
533        let mut accepted = false;
534        for _ in 0..30 {
535            let mut x_try: Vec<f64> = x
536                .iter()
537                .zip(dir.iter())
538                .map(|(xi, di)| xi + lr * di)
539                .collect();
540            for i in 0..n {
541                x_try[i] = x_try[i].clamp(lb[i], ub[i]);
542            }
543            if f(&x_try) <= f0 + 1e-4 * lr * grad_dot {
544                x = x_try;
545                lr *= 1.2;
546                lr = lr.min(1.0);
547                accepted = true;
548                break;
549            }
550            lr *= 0.5;
551        }
552        if !accepted {
553            break;
554        }
555    }
556    let f_val = f(&x);
557    OptResult {
558        x,
559        f_val,
560        n_iter,
561        converged,
562    }
563}
564/// Gradient descent with heavy-ball (Polyak) momentum.
565///
566/// Update rule: `v ← momentum * v - lr * grad(x)`,  `x ← x + v`.
567pub fn gradient_descent_momentum(
568    f: impl Fn(&[f64]) -> f64,
569    grad: impl Fn(&[f64]) -> Vec<f64>,
570    x0: Vec<f64>,
571    lr: f64,
572    momentum: f64,
573    tol: f64,
574    max_iter: u32,
575) -> OptResult {
576    let n = x0.len();
577    let mut x = x0;
578    let mut vel = vec![0.0f64; n];
579    let mut converged = false;
580    let mut n_iter = 0u32;
581    for _ in 0..max_iter {
582        n_iter += 1;
583        let g = grad(&x);
584        let gnorm: f64 = g.iter().map(|v| v * v).sum::<f64>().sqrt();
585        if gnorm < tol {
586            converged = true;
587            break;
588        }
589        for i in 0..n {
590            vel[i] = momentum * vel[i] - lr * g[i];
591            x[i] += vel[i];
592        }
593    }
594    let f_val = f(&x);
595    OptResult {
596        x,
597        f_val,
598        n_iter,
599        converged,
600    }
601}
602/// Simulated annealing for global minimisation.
603///
604/// Uses a random walk proposal `x_new = x + N(0, step_size)`.  The Metropolis
605/// acceptance criterion allows uphill moves with probability `exp(-ΔE / T)`.
606///
607/// Temperature follows a geometric schedule: `T ← T * cooling_rate`.
608pub fn simulated_annealing(
609    f: impl Fn(&[f64]) -> f64,
610    x0: Vec<f64>,
611    initial_temp: f64,
612    cooling_rate: f64,
613    step_size: f64,
614    max_iter: u32,
615) -> OptResult {
616    use rand::RngExt;
617    let mut x = x0.clone();
618    let mut f_cur = f(&x);
619    let mut best_x = x.clone();
620    let mut best_f = f_cur;
621    let mut temp = initial_temp;
622    let mut rng = rand::rng();
623    for _ in 0..max_iter {
624        let x_new: Vec<f64> = x
625            .iter()
626            .map(|xi| xi + (rng.random::<f64>() - 0.5) * 2.0 * step_size)
627            .collect();
628        let f_new = f(&x_new);
629        let delta = f_new - f_cur;
630        if delta < 0.0 || (temp > 1e-300 && rng.random::<f64>() < (-delta / temp).exp()) {
631            x = x_new;
632            f_cur = f_new;
633            if f_cur < best_f {
634                best_f = f_cur;
635                best_x = x.clone();
636            }
637        }
638        temp *= cooling_rate;
639    }
640    OptResult {
641        x: best_x,
642        f_val: best_f,
643        n_iter: max_iter,
644        converged: temp < 1e-10,
645    }
646}
647/// Particle Swarm Optimization (PSO).
648///
649/// Minimises `f` using `n_particles` agents.  Each agent is initialised
650/// uniformly in `[lb[i\], ub[i]]`.
651///
652/// * `w` — inertia weight (0.5–0.9 typical)
653/// * `c1` — cognitive coefficient (≈ 2.0)
654/// * `c2` — social coefficient (≈ 2.0)
655#[allow(clippy::too_many_arguments)]
656pub fn particle_swarm(
657    f: impl Fn(&[f64]) -> f64,
658    lb: &[f64],
659    ub: &[f64],
660    n_particles: usize,
661    w: f64,
662    c1: f64,
663    c2: f64,
664    max_iter: u32,
665) -> OptResult {
666    let n = lb.len();
667    let mut rng = rand::rng();
668    let mut particles: Vec<Particle> = (0..n_particles)
669        .map(|_| {
670            let pos: Vec<f64> = (0..n)
671                .map(|i| lb[i] + rng.random::<f64>() * (ub[i] - lb[i]))
672                .collect();
673            let vel: Vec<f64> = (0..n)
674                .map(|i| (rng.random::<f64>() - 0.5) * (ub[i] - lb[i]) * 0.1)
675                .collect();
676            let val = f(&pos);
677            Particle {
678                best_pos: pos.clone(),
679                best_val: val,
680                pos,
681                vel,
682            }
683        })
684        .collect();
685    let init_best = particles
686        .iter()
687        .min_by(|a, b| {
688            a.best_val
689                .partial_cmp(&b.best_val)
690                .unwrap_or(std::cmp::Ordering::Equal)
691        })
692        .expect("particles is non-empty");
693    let mut global_best_pos = init_best.best_pos.clone();
694    let mut global_best_val = init_best.best_val;
695    for _ in 0..max_iter {
696        for p in particles.iter_mut() {
697            for i in 0..n {
698                let r1: f64 = rng.random();
699                let r2: f64 = rng.random();
700                p.vel[i] = w * p.vel[i]
701                    + c1 * r1 * (p.best_pos[i] - p.pos[i])
702                    + c2 * r2 * (global_best_pos[i] - p.pos[i]);
703                p.pos[i] = (p.pos[i] + p.vel[i]).clamp(lb[i], ub[i]);
704            }
705            let val = f(&p.pos);
706            if val < p.best_val {
707                p.best_val = val;
708                p.best_pos = p.pos.clone();
709                if val < global_best_val {
710                    global_best_val = val;
711                    global_best_pos = p.pos.clone();
712                }
713            }
714        }
715    }
716    OptResult {
717        x: global_best_pos,
718        f_val: global_best_val,
719        n_iter: max_iter,
720        converged: true,
721    }
722}
723/// Genetic algorithm for real-valued continuous minimisation.
724///
725/// Represents each individual as a `Vec`f64` in `\[lb\[i\\], ub\[i\]]`.
726/// Uses tournament selection, arithmetic crossover, and uniform mutation.
727#[allow(clippy::too_many_arguments)]
728pub fn genetic_algorithm(
729    f: impl Fn(&[f64]) -> f64,
730    lb: &[f64],
731    ub: &[f64],
732    pop_size: usize,
733    crossover_rate: f64,
734    mutation_rate: f64,
735    mutation_scale: f64,
736    max_generations: u32,
737) -> OptResult {
738    let n = lb.len();
739    let mut rng = rand::rng();
740    let mut pop: Vec<Vec<f64>> = (0..pop_size)
741        .map(|_| {
742            (0..n)
743                .map(|i| lb[i] + rng.random::<f64>() * (ub[i] - lb[i]))
744                .collect()
745        })
746        .collect();
747    let mut fitness: Vec<f64> = pop.iter().map(|x| f(x)).collect();
748    for _ in 0..max_generations {
749        let mut new_pop: Vec<Vec<f64>> = Vec::with_capacity(pop_size);
750        for _ in 0..pop_size {
751            let a_idx = {
752                let i = (rng.random::<f64>() * pop_size as f64) as usize % pop_size;
753                let j = (rng.random::<f64>() * pop_size as f64) as usize % pop_size;
754                if fitness[i] < fitness[j] { i } else { j }
755            };
756            let b_idx = {
757                let i = (rng.random::<f64>() * pop_size as f64) as usize % pop_size;
758                let j = (rng.random::<f64>() * pop_size as f64) as usize % pop_size;
759                if fitness[i] < fitness[j] { i } else { j }
760            };
761            let mut child = pop[a_idx].clone();
762            if rng.random::<f64>() < crossover_rate {
763                let alpha: f64 = rng.random();
764                child = child
765                    .iter()
766                    .zip(pop[b_idx].iter())
767                    .map(|(&a, &b)| alpha * a + (1.0 - alpha) * b)
768                    .collect();
769            }
770            for i in 0..n {
771                if rng.random::<f64>() < mutation_rate {
772                    let delta = (rng.random::<f64>() - 0.5) * 2.0 * mutation_scale;
773                    child[i] = (child[i] + delta).clamp(lb[i], ub[i]);
774                }
775            }
776            new_pop.push(child);
777        }
778        pop = new_pop;
779        fitness = pop.iter().map(|x| f(x)).collect();
780    }
781    let best_idx = fitness
782        .iter()
783        .enumerate()
784        .min_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
785        .map(|(i, _)| i)
786        .unwrap_or(0);
787    OptResult {
788        x: pop[best_idx].clone(),
789        f_val: fitness[best_idx],
790        n_iter: max_generations,
791        converged: true,
792    }
793}
794/// L-BFGS-B: box-constrained variant of L-BFGS.
795///
796/// Enforces `lb\[i\] ≀ x\[i\] ≀ ub\[i\]` by projecting the search direction onto the
797/// feasible set after each step (simple projected L-BFGS).
798#[allow(clippy::too_many_arguments)]
799pub fn lbfgsb(
800    f: impl Fn(&[f64]) -> f64,
801    grad: impl Fn(&[f64]) -> Vec<f64>,
802    x0: Vec<f64>,
803    lb: &[f64],
804    ub: &[f64],
805    m: usize,
806    tol: f64,
807    max_iter: u32,
808) -> OptResult {
809    let mut x: Vec<f64> = x0
810        .iter()
811        .enumerate()
812        .map(|(i, &xi)| xi.clamp(lb[i], ub[i]))
813        .collect();
814    let mut g = grad(&x);
815    let mut s_list: Vec<Vec<f64>> = Vec::with_capacity(m);
816    let mut y_list: Vec<Vec<f64>> = Vec::with_capacity(m);
817    let mut converged = false;
818    let mut n_iter = 0u32;
819    for _ in 0..max_iter {
820        n_iter += 1;
821        let gnorm: f64 = g.iter().map(|v| v * v).sum::<f64>().sqrt();
822        if gnorm < tol {
823            converged = true;
824            break;
825        }
826        let k = s_list.len();
827        let mut q = g.clone();
828        let mut alphas = vec![0.0f64; k];
829        for i in (0..k).rev() {
830            let sy = dot(&y_list[i], &s_list[i]);
831            if sy.abs() < 1e-15 {
832                continue;
833            }
834            let rho_i = 1.0 / sy;
835            let a = rho_i * dot(&s_list[i], &q);
836            alphas[i] = a;
837            axpy(-a, &y_list[i].clone(), &mut q);
838        }
839        let mut r = if k > 0 {
840            let scale = dot(&s_list[k - 1], &y_list[k - 1])
841                / dot(&y_list[k - 1], &y_list[k - 1]).max(1e-15);
842            q.iter().map(|qi| scale * qi).collect::<Vec<_>>()
843        } else {
844            q.clone()
845        };
846        for i in 0..k {
847            let sy = dot(&y_list[i], &s_list[i]);
848            if sy.abs() < 1e-15 {
849                continue;
850            }
851            let rho_i = 1.0 / sy;
852            let beta = rho_i * dot(&y_list[i], &r);
853            axpy(alphas[i] - beta, &s_list[i].clone(), &mut r);
854        }
855        let direction: Vec<f64> = r.iter().map(|ri| -*ri).collect();
856        let proj_dir: Vec<f64> = direction
857            .iter()
858            .enumerate()
859            .map(|(i, &di)| {
860                if (x[i] <= lb[i] && di < 0.0) || (x[i] >= ub[i] && di > 0.0) {
861                    0.0
862                } else {
863                    di
864                }
865            })
866            .collect();
867        let f0 = f(&x);
868        let gd = dot(&g, &proj_dir);
869        let alpha = backtracking_line_search(&f, &x, &proj_dir, f0, gd, 1.0, 0.5, 1e-4);
870        let s: Vec<f64> = proj_dir.iter().map(|di| alpha * di).collect();
871        let x_new: Vec<f64> = x
872            .iter()
873            .zip(s.iter())
874            .enumerate()
875            .map(|(i, (xi, si))| (xi + si).clamp(lb[i], ub[i]))
876            .collect();
877        let g_new = grad(&x_new);
878        let y: Vec<f64> = g_new
879            .iter()
880            .zip(g.iter())
881            .map(|(gni, gi)| gni - gi)
882            .collect();
883        let sy = dot(&s, &y);
884        if sy > 1e-15 {
885            if s_list.len() == m {
886                s_list.remove(0);
887                y_list.remove(0);
888            }
889            s_list.push(s);
890            y_list.push(y);
891        }
892        x = x_new;
893        g = g_new;
894    }
895    let f_val = f(&x);
896    OptResult {
897        x,
898        f_val,
899        n_iter,
900        converged,
901    }
902}
903/// Nonlinear conjugate gradient minimization using the Fletcher-Reeves update.
904///
905/// Uses a backtracking line search at each step. The direction is restarted
906/// every `n` iterations.
907pub fn conjugate_gradient_minimize<F, G>(
908    f: F,
909    grad: G,
910    x0: Vec<f64>,
911    max_iter: usize,
912    tol: f64,
913) -> Vec<f64>
914where
915    F: Fn(&[f64]) -> f64,
916    G: Fn(&[f64]) -> Vec<f64>,
917{
918    let n = x0.len();
919    let mut x = x0;
920    let mut g = grad(&x);
921    let mut d: Vec<f64> = g.iter().map(|gi| -*gi).collect();
922    for iter in 0..max_iter {
923        let gnorm: f64 = g.iter().map(|v| v * v).sum::<f64>().sqrt();
924        if gnorm < tol {
925            break;
926        }
927        let f0 = f(&x);
928        let gd = g.iter().zip(d.iter()).map(|(gi, di)| gi * di).sum::<f64>();
929        if gd >= 0.0 {
930            d = g.iter().map(|gi| -*gi).collect();
931        }
932        let gd = g.iter().zip(d.iter()).map(|(gi, di)| gi * di).sum::<f64>();
933        let alpha = backtracking_line_search(&f, &x, &d, f0, gd, 1.0, 0.5, 1e-4);
934        let x_new: Vec<f64> = (0..n).map(|i| x[i] + alpha * d[i]).collect();
935        let g_new = grad(&x_new);
936        let g_new_sq: f64 = g_new.iter().map(|v| v * v).sum();
937        let g_sq: f64 = g.iter().map(|v| v * v).sum();
938        let beta = if g_sq < 1e-30 || (iter + 1) % n == 0 {
939            0.0
940        } else {
941            g_new_sq / g_sq
942        };
943        d = (0..n).map(|i| -g_new[i] + beta * d[i]).collect();
944        x = x_new;
945        g = g_new;
946    }
947    x
948}
949/// Strong Wolfe conditions line search.
950///
951/// Finds a step length `α` satisfying:
952/// 1. Sufficient decrease (Armijo): `f(x + α d) ≀ f(x) + c1 * α * ∇f(x)·d`
953/// 2. Curvature: `|∇f(x + α d)·d| ≀ c2 * |∇f(x)·d|`
954///
955/// Returns the accepted step length.
956#[allow(clippy::too_many_arguments)]
957pub fn wolfe_line_search(
958    f: impl Fn(&[f64]) -> f64,
959    grad: impl Fn(&[f64]) -> Vec<f64>,
960    x: &[f64],
961    direction: &[f64],
962    f0: f64,
963    g0_dot_d: f64,
964    c1: f64,
965    c2: f64,
966    max_iter: usize,
967) -> f64 {
968    let n = x.len();
969    let mut alpha = 1.0f64;
970    let mut alpha_lo = 0.0f64;
971    let mut alpha_hi = f64::INFINITY;
972    for _ in 0..max_iter {
973        let x_new: Vec<f64> = (0..n).map(|i| x[i] + alpha * direction[i]).collect();
974        let f_new = f(&x_new);
975        if f_new > f0 + c1 * alpha * g0_dot_d {
976            alpha_hi = alpha;
977            alpha = 0.5 * (alpha_lo + alpha_hi);
978            continue;
979        }
980        let g_new = grad(&x_new);
981        let g_new_dot_d: f64 = g_new.iter().zip(direction.iter()).map(|(g, d)| g * d).sum();
982        if g_new_dot_d.abs() <= c2 * g0_dot_d.abs() {
983            return alpha;
984        }
985        if g_new_dot_d > 0.0 {
986            alpha_hi = alpha;
987            alpha = 0.5 * (alpha_lo + alpha_hi);
988        } else {
989            alpha_lo = alpha;
990            if alpha_hi.is_infinite() {
991                alpha *= 2.0;
992            } else {
993                alpha = 0.5 * (alpha_lo + alpha_hi);
994            }
995        }
996    }
997    alpha
998}
999/// Augmented Lagrangian method for equality-constrained minimisation.
1000///
1001/// Minimises `f(x)` subject to `c_i(x) = 0` for each constraint in `constraints`.
1002///
1003/// Strategy: alternates between unconstrained minimisation of the augmented
1004/// Lagrangian `L_ρ(x,λ) = f(x) + ÎŁ λ_i c_i(x) + (ρ/2) ÎŁ c_i(x)ÂČ` and a
1005/// dual update `λ_i ← λ_i + ρ * c_i(x)`.
1006///
1007/// Each unconstrained sub-problem is solved with a few steps of gradient
1008/// descent (a full sub-solver can be plugged in via `inner_iter`).
1009#[allow(clippy::too_many_arguments)]
1010pub fn augmented_lagrangian<F, G, C, DC>(
1011    f: F,
1012    grad_f: G,
1013    constraints: &[C],
1014    grad_constraints: &[DC],
1015    x0: Vec<f64>,
1016    rho0: f64,
1017    outer_iter: usize,
1018    inner_iter: usize,
1019    tol: f64,
1020) -> OptResult
1021where
1022    F: Fn(&[f64]) -> f64,
1023    G: Fn(&[f64]) -> Vec<f64>,
1024    C: Fn(&[f64]) -> f64,
1025    DC: Fn(&[f64]) -> Vec<f64>,
1026{
1027    let n = x0.len();
1028    let m = constraints.len();
1029    let mut x = x0;
1030    let mut lam = vec![0.0f64; m];
1031    let mut rho = rho0;
1032    let mut converged = false;
1033    let mut total_iter = 0u32;
1034    for _outer in 0..outer_iter {
1035        for _inner in 0..inner_iter {
1036            total_iter += 1;
1037            let mut g = grad_f(&x);
1038            for (i, (ci, dci)) in constraints.iter().zip(grad_constraints.iter()).enumerate() {
1039                let cv = ci(&x);
1040                let dv = dci(&x);
1041                let coeff = lam[i] + rho * cv;
1042                for j in 0..n {
1043                    g[j] += coeff * dv[j];
1044                }
1045            }
1046            let gnorm: f64 = g.iter().map(|v| v * v).sum::<f64>().sqrt();
1047            if gnorm < tol * 0.1 {
1048                break;
1049            }
1050            let mut lr = 0.1;
1051            let aug_f = |xp: &[f64]| {
1052                let mut val = f(xp);
1053                for (i, ci) in constraints.iter().enumerate() {
1054                    let cv = ci(xp);
1055                    val += lam[i] * cv + 0.5 * rho * cv * cv;
1056                }
1057                val
1058            };
1059            let f0 = aug_f(&x);
1060            for _ in 0..20 {
1061                let x_try: Vec<f64> = (0..n).map(|j| x[j] - lr * g[j]).collect();
1062                if aug_f(&x_try) < f0 {
1063                    x = x_try;
1064                    break;
1065                }
1066                lr *= 0.5;
1067            }
1068        }
1069        let mut max_viol = 0.0f64;
1070        for (i, ci) in constraints.iter().enumerate() {
1071            let cv = ci(&x);
1072            lam[i] += rho * cv;
1073            max_viol = max_viol.max(cv.abs());
1074        }
1075        if max_viol < tol {
1076            converged = true;
1077            break;
1078        }
1079        rho *= 2.0;
1080    }
1081    let f_val = f(&x);
1082    OptResult {
1083        x,
1084        f_val,
1085        n_iter: total_iter,
1086        converged,
1087    }
1088}
1089/// Cyclic coordinate descent (derivative-free).
1090///
1091/// Iterates over each coordinate in turn, performing a golden-section search
1092/// along that axis.  Useful when gradients are unavailable.
1093pub fn coordinate_descent(
1094    f: impl Fn(&[f64]) -> f64,
1095    x0: Vec<f64>,
1096    step: f64,
1097    tol: f64,
1098    max_iter: usize,
1099) -> OptResult {
1100    let n = x0.len();
1101    let mut x = x0;
1102    let mut converged = false;
1103    let mut n_iter = 0u32;
1104    for _iter in 0..max_iter {
1105        n_iter += 1;
1106        let x_old = x.clone();
1107        for d in 0..n {
1108            let (best_delta, _) = golden_section(
1109                |delta| {
1110                    let mut xp = x.clone();
1111                    xp[d] += delta;
1112                    f(&xp)
1113                },
1114                -step,
1115                step,
1116                tol * 1e-2,
1117            );
1118            x[d] += best_delta;
1119        }
1120        let diff: f64 = x
1121            .iter()
1122            .zip(x_old.iter())
1123            .map(|(a, b)| (a - b).powi(2))
1124            .sum::<f64>()
1125            .sqrt();
1126        if diff < tol {
1127            converged = true;
1128            break;
1129        }
1130    }
1131    let f_val = f(&x);
1132    OptResult {
1133        x,
1134        f_val,
1135        n_iter,
1136        converged,
1137    }
1138}
1139/// Powell's method (derivative-free multidimensional minimisation).
1140///
1141/// Maintains a set of conjugate directions and performs line searches
1142/// along each.  Resets directions periodically to avoid linear dependence.
1143pub fn powell(f: impl Fn(&[f64]) -> f64, x0: Vec<f64>, tol: f64, max_iter: usize) -> OptResult {
1144    let n = x0.len();
1145    let mut x = x0;
1146    let mut dirs: Vec<Vec<f64>> = (0..n)
1147        .map(|i| {
1148            let mut d = vec![0.0; n];
1149            d[i] = 1.0;
1150            d
1151        })
1152        .collect();
1153    let mut converged = false;
1154    let mut n_iter = 0u32;
1155    for _iter in 0..max_iter {
1156        n_iter += 1;
1157        let x_start = x.clone();
1158        let mut max_decrease = 0.0f64;
1159        let mut max_dir_idx = 0;
1160        for (k, dir) in dirs.iter().enumerate() {
1161            let f_before = f(&x);
1162            let (alpha, _) = golden_section(
1163                |a| {
1164                    let xp: Vec<f64> = x
1165                        .iter()
1166                        .zip(dir.iter())
1167                        .map(|(xi, di)| xi + a * di)
1168                        .collect();
1169                    f(&xp)
1170                },
1171                -10.0,
1172                10.0,
1173                tol * 0.01,
1174            );
1175            let xp: Vec<f64> = x
1176                .iter()
1177                .zip(dir.iter())
1178                .map(|(xi, di)| xi + alpha * di)
1179                .collect();
1180            let decrease = f_before - f(&xp);
1181            if decrease > max_decrease {
1182                max_decrease = decrease;
1183                max_dir_idx = k;
1184            }
1185            x = xp;
1186        }
1187        let diff: f64 = x
1188            .iter()
1189            .zip(x_start.iter())
1190            .map(|(a, b)| (a - b).powi(2))
1191            .sum::<f64>()
1192            .sqrt();
1193        if diff < tol {
1194            converged = true;
1195            break;
1196        }
1197        let new_dir: Vec<f64> = x.iter().zip(x_start.iter()).map(|(a, b)| a - b).collect();
1198        let norm: f64 = new_dir.iter().map(|v| v * v).sum::<f64>().sqrt();
1199        if norm > 1e-15 {
1200            dirs[max_dir_idx] = new_dir.iter().map(|v| v / norm).collect();
1201        }
1202    }
1203    let f_val = f(&x);
1204    OptResult {
1205        x,
1206        f_val,
1207        n_iter,
1208        converged,
1209    }
1210}
1211/// SGD with cosine annealing learning-rate schedule.
1212///
1213/// Useful for noisy / stochastic objective functions.  The gradient `stoch_grad`
1214/// may return a noisy (mini-batch) estimate.
1215pub fn sgd_cosine_annealing(
1216    f: impl Fn(&[f64]) -> f64,
1217    stoch_grad: impl Fn(&[f64]) -> Vec<f64>,
1218    x0: Vec<f64>,
1219    lr_max: f64,
1220    lr_min: f64,
1221    max_iter: u32,
1222) -> OptResult {
1223    let n = x0.len();
1224    let mut x = x0;
1225    let mut n_iter = 0u32;
1226    for t in 0..max_iter {
1227        n_iter = t + 1;
1228        let cos_factor = 0.5 * (1.0 + (std::f64::consts::PI * t as f64 / max_iter as f64).cos());
1229        let lr = lr_min + (lr_max - lr_min) * cos_factor;
1230        let g = stoch_grad(&x);
1231        for i in 0..n {
1232            x[i] -= lr * g[i];
1233        }
1234    }
1235    let f_val = f(&x);
1236    let gnorm: f64 = stoch_grad(&x).iter().map(|v| v * v).sum::<f64>().sqrt();
1237    OptResult {
1238        x,
1239        f_val,
1240        n_iter,
1241        converged: gnorm < 1e-4,
1242    }
1243}
1244/// Full-memory BFGS (Broyden-Fletcher-Goldfarb-Shanno).
1245///
1246/// Maintains an approximate Hessian inverse `H` (n×n matrix) updated with
1247/// each iteration.  Falls back to gradient descent when curvature information
1248/// is insufficient.
1249pub fn bfgs(
1250    f: impl Fn(&[f64]) -> f64,
1251    grad: impl Fn(&[f64]) -> Vec<f64>,
1252    x0: Vec<f64>,
1253    tol: f64,
1254    max_iter: usize,
1255) -> OptResult {
1256    let n = x0.len();
1257    let mut x = x0;
1258    let mut h: Vec<Vec<f64>> = (0..n)
1259        .map(|i| {
1260            let mut row = vec![0.0; n];
1261            row[i] = 1.0;
1262            row
1263        })
1264        .collect();
1265    let mut converged = false;
1266    let mut n_iter = 0u32;
1267    let mut g = grad(&x);
1268    for _ in 0..max_iter {
1269        n_iter += 1;
1270        let gnorm: f64 = g.iter().map(|v| v * v).sum::<f64>().sqrt();
1271        if gnorm < tol {
1272            converged = true;
1273            break;
1274        }
1275        let p: Vec<f64> = (0..n)
1276            .map(|i| {
1277                -h[i]
1278                    .iter()
1279                    .zip(g.iter())
1280                    .map(|(hij, gj)| hij * gj)
1281                    .sum::<f64>()
1282            })
1283            .collect();
1284        let f0 = f(&x);
1285        let gd: f64 = g.iter().zip(p.iter()).map(|(gi, pi)| gi * pi).sum();
1286        let alpha = backtracking_line_search(&f, &x, &p, f0, gd, 1.0, 0.5, 1e-4);
1287        let x_new: Vec<f64> = (0..n).map(|i| x[i] + alpha * p[i]).collect();
1288        let g_new = grad(&x_new);
1289        let s: Vec<f64> = (0..n).map(|i| x_new[i] - x[i]).collect();
1290        let y: Vec<f64> = (0..n).map(|i| g_new[i] - g[i]).collect();
1291        let sy: f64 = s.iter().zip(y.iter()).map(|(si, yi)| si * yi).sum();
1292        if sy > 1e-15 {
1293            let hy: Vec<f64> = (0..n)
1294                .map(|i| {
1295                    h[i].iter()
1296                        .zip(y.iter())
1297                        .map(|(hij, yj)| hij * yj)
1298                        .sum::<f64>()
1299                })
1300                .collect();
1301            let ythy: f64 = y.iter().zip(hy.iter()).map(|(yi, hyi)| yi * hyi).sum();
1302            let rho_bfgs = 1.0 / sy;
1303            for i in 0..n {
1304                for j in 0..n {
1305                    h[i][j] += rho_bfgs * s[i] * s[j] - (hy[i] * s[j] + s[i] * hy[j]) / ythy;
1306                }
1307            }
1308        }
1309        x = x_new;
1310        g = g_new;
1311    }
1312    let f_val = f(&x);
1313    OptResult {
1314        x,
1315        f_val,
1316        n_iter,
1317        converged,
1318    }
1319}
1320/// Clips the L2 norm of `grad` to `max_norm` (in-place).
1321///
1322/// Used in deep learning / physics-ML to stabilise training.
1323pub fn clip_gradient(grad: &mut [f64], max_norm: f64) {
1324    let norm: f64 = grad.iter().map(|v| v * v).sum::<f64>().sqrt();
1325    if norm > max_norm && norm > 1e-15 {
1326        let scale = max_norm / norm;
1327        for g in grad.iter_mut() {
1328            *g *= scale;
1329        }
1330    }
1331}
1332/// Gradient descent with gradient clipping.
1333pub fn gradient_descent_clipped(
1334    f: impl Fn(&[f64]) -> f64,
1335    grad: impl Fn(&[f64]) -> Vec<f64>,
1336    x0: Vec<f64>,
1337    lr: f64,
1338    max_norm: f64,
1339    tol: f64,
1340    max_iter: u32,
1341) -> OptResult {
1342    let mut x = x0;
1343    let mut converged = false;
1344    let mut n_iter = 0u32;
1345    for _ in 0..max_iter {
1346        n_iter += 1;
1347        let mut g = grad(&x);
1348        clip_gradient(&mut g, max_norm);
1349        let gnorm: f64 = g.iter().map(|v| v * v).sum::<f64>().sqrt();
1350        if gnorm < tol {
1351            converged = true;
1352            break;
1353        }
1354        for (xi, gi) in x.iter_mut().zip(g.iter()) {
1355            *xi -= lr * gi;
1356        }
1357    }
1358    let f_val = f(&x);
1359    OptResult {
1360        x,
1361        f_val,
1362        n_iter,
1363        converged,
1364    }
1365}
1366/// Compute the lower-triangular Cholesky factor L of an n×n matrix stored
1367/// row-major.  Returns L such that L * L^T ≈ A.
1368///
1369/// Falls back to the identity if the matrix is not positive-definite.
1370pub fn cholesky_lower(a: &[f64], n: usize) -> Vec<f64> {
1371    let mut l = vec![0.0_f64; n * n];
1372    for i in 0..n {
1373        for j in 0..=i {
1374            let s: f64 = (0..j).map(|k| l[i * n + k] * l[j * n + k]).sum();
1375            if i == j {
1376                let d = a[i * n + i] - s;
1377                l[i * n + j] = if d > 0.0 { d.sqrt() } else { 1e-8 };
1378            } else {
1379                let ljj = l[j * n + j];
1380                l[i * n + j] = if ljj.abs() > f64::EPSILON {
1381                    (a[i * n + j] - s) / ljj
1382                } else {
1383                    0.0
1384                };
1385            }
1386        }
1387    }
1388    l
1389}
1390/// Forward substitution: solve L * x = b for lower-triangular L (n×n, row-major).
1391pub fn forward_solve_lower(l: &[f64], b: &[f64], n: usize) -> Vec<f64> {
1392    let mut x = vec![0.0_f64; n];
1393    for i in 0..n {
1394        let s: f64 = (0..i).map(|j| l[i * n + j] * x[j]).sum();
1395        let lii = l[i * n + i];
1396        x[i] = if lii.abs() > f64::EPSILON {
1397            (b[i] - s) / lii
1398        } else {
1399            0.0
1400        };
1401    }
1402    x
1403}