numra_optim/
derivative_free.rs

1//! Derivative-free optimization methods: Nelder-Mead and Powell.
2//!
3//! Author: Moussa Leblouba
4//! Date: 9 February 2026
5//! Modified: 2 May 2026
6
7use numra_core::Scalar;
8
9use crate::error::OptimError;
10use crate::types::{IterationRecord, OptimResult, OptimStatus};
11
12// ─── Nelder-Mead Simplex ─────────────────────────────────────────────────
13
14/// Options for the Nelder-Mead simplex method.
15#[derive(Clone, Debug)]
16pub struct NelderMeadOptions<S: Scalar> {
17    pub max_iter: usize,
18    pub fatol: S,
19    pub xatol: S,
20    pub initial_simplex_scale: S,
21    pub adaptive: bool,
22    pub verbose: bool,
23}
24
25impl<S: Scalar> Default for NelderMeadOptions<S> {
26    fn default() -> Self {
27        Self {
28            max_iter: 10_000,
29            fatol: S::from_f64(1e-8),
30            xatol: S::from_f64(1e-8),
31            initial_simplex_scale: S::from_f64(0.05),
32            adaptive: true,
33            verbose: false,
34        }
35    }
36}
37
38#[allow(clippy::needless_range_loop)]
39/// Nelder-Mead simplex method for derivative-free optimization.
40///
41/// Minimizes `f(x)` starting from `x0`. Uses the standard simplex operations:
42/// reflection, expansion, contraction, and shrink.
43///
44/// # Arguments
45///
46/// * `f` - Objective function.
47/// * `x0` - Initial guess.
48/// * `opts` - Algorithm options.
49pub fn nelder_mead<S, F>(
50    f: F,
51    x0: &[S],
52    opts: &NelderMeadOptions<S>,
53) -> Result<OptimResult<S>, OptimError>
54where
55    S: Scalar,
56    F: Fn(&[S]) -> S,
57{
58    let start = std::time::Instant::now();
59    let n = x0.len();
60    if n == 0 {
61        return Err(OptimError::DimensionMismatch {
62            expected: 1,
63            actual: 0,
64        });
65    }
66
67    // Adaptive parameters (Gao & Han, 2012) for high-dimensional problems
68    let (alpha, gamma, rho, sigma) = if opts.adaptive {
69        let nf = n as f64;
70        (
71            S::ONE,
72            S::ONE + S::TWO / S::from_f64(nf),
73            S::from_f64(0.75 - 0.5 / nf),
74            S::ONE - S::ONE / S::from_f64(nf),
75        )
76    } else {
77        (S::ONE, S::TWO, S::HALF, S::HALF)
78    };
79
80    // Build initial simplex: x0 and n perturbations
81    let mut simplex: Vec<Vec<S>> = Vec::with_capacity(n + 1);
82    simplex.push(x0.to_vec());
83    for i in 0..n {
84        let mut v = x0.to_vec();
85        let scale = if v[i].abs() > S::from_f64(1e-10) {
86            v[i] * opts.initial_simplex_scale
87        } else {
88            opts.initial_simplex_scale
89        };
90        v[i] += scale;
91        simplex.push(v);
92    }
93
94    let mut f_vals: Vec<S> = simplex.iter().map(|v| f(v)).collect();
95    let mut n_feval = n + 1;
96
97    let mut history: Vec<IterationRecord<S>> = Vec::new();
98    let mut iterations = 0;
99    let mut converged = false;
100
101    for iter in 0..opts.max_iter {
102        iterations = iter + 1;
103
104        // Sort simplex by function value
105        let mut indices: Vec<usize> = (0..=n).collect();
106        indices.sort_by(|&a, &b| f_vals[a].to_f64().partial_cmp(&f_vals[b].to_f64()).unwrap());
107        simplex = indices.iter().map(|&i| simplex[i].clone()).collect();
108        f_vals = indices.iter().map(|&i| f_vals[i]).collect();
109
110        let f_best = f_vals[0];
111        let f_worst = f_vals[n];
112        let f_second_worst = f_vals[n - 1];
113
114        if opts.verbose && iter % 100 == 0 {
115            eprintln!(
116                "NM iter {}: f_best = {:.6e}, f_worst = {:.6e}",
117                iter,
118                f_best.to_f64(),
119                f_worst.to_f64()
120            );
121        }
122
123        history.push(IterationRecord {
124            iteration: iter,
125            objective: f_best,
126            gradient_norm: S::ZERO,
127            step_size: (f_worst - f_best).abs(),
128            constraint_violation: S::ZERO,
129        });
130
131        // Check convergence: function values spread and simplex size
132        let f_range = (f_worst - f_best).abs();
133        let mut max_dx = S::ZERO;
134        for i in 1..=n {
135            for j in 0..n {
136                let d = (simplex[i][j] - simplex[0][j]).abs();
137                if d > max_dx {
138                    max_dx = d;
139                }
140            }
141        }
142
143        if f_range < opts.fatol && max_dx < opts.xatol {
144            converged = true;
145            break;
146        }
147
148        // Centroid of the best n vertices (excluding worst)
149        let mut centroid = vec![S::ZERO; n];
150        for i in 0..n {
151            for j in 0..n {
152                centroid[j] += simplex[i][j];
153            }
154        }
155        let n_s = S::from_usize(n);
156        for c in centroid.iter_mut() {
157            *c /= n_s;
158        }
159
160        // Reflection: x_r = centroid + alpha * (centroid - worst)
161        let x_r: Vec<S> = (0..n)
162            .map(|j| centroid[j] + alpha * (centroid[j] - simplex[n][j]))
163            .collect();
164        let f_r = f(&x_r);
165        n_feval += 1;
166
167        if f_r >= f_best && f_r < f_second_worst {
168            // Accept reflection
169            simplex[n] = x_r;
170            f_vals[n] = f_r;
171            continue;
172        }
173
174        if f_r < f_best {
175            // Try expansion: x_e = centroid + gamma * (x_r - centroid)
176            let x_e: Vec<S> = (0..n)
177                .map(|j| centroid[j] + gamma * (x_r[j] - centroid[j]))
178                .collect();
179            let f_e = f(&x_e);
180            n_feval += 1;
181
182            if f_e < f_r {
183                simplex[n] = x_e;
184                f_vals[n] = f_e;
185            } else {
186                simplex[n] = x_r;
187                f_vals[n] = f_r;
188            }
189            continue;
190        }
191
192        // f_r >= f_second_worst: contraction
193        if f_r < f_worst {
194            // Outside contraction: x_c = centroid + rho * (x_r - centroid)
195            let x_c: Vec<S> = (0..n)
196                .map(|j| centroid[j] + rho * (x_r[j] - centroid[j]))
197                .collect();
198            let f_c = f(&x_c);
199            n_feval += 1;
200
201            if f_c <= f_r {
202                simplex[n] = x_c;
203                f_vals[n] = f_c;
204                continue;
205            }
206        } else {
207            // Inside contraction: x_cc = centroid - rho * (centroid - worst)
208            let x_cc: Vec<S> = (0..n)
209                .map(|j| centroid[j] - rho * (centroid[j] - simplex[n][j]))
210                .collect();
211            let f_cc = f(&x_cc);
212            n_feval += 1;
213
214            if f_cc < f_worst {
215                simplex[n] = x_cc;
216                f_vals[n] = f_cc;
217                continue;
218            }
219        }
220
221        // Shrink: move all vertices toward the best
222        for i in 1..=n {
223            for j in 0..n {
224                simplex[i][j] = simplex[0][j] + sigma * (simplex[i][j] - simplex[0][j]);
225            }
226            f_vals[i] = f(&simplex[i]);
227            n_feval += 1;
228        }
229    }
230
231    let (status, message) = if converged {
232        (
233            OptimStatus::FunctionConverged,
234            format!("Nelder-Mead converged after {} iterations", iterations),
235        )
236    } else {
237        (
238            OptimStatus::MaxIterations,
239            format!("Nelder-Mead: max iterations ({}) reached", opts.max_iter),
240        )
241    };
242
243    Ok(OptimResult {
244        x: simplex[0].clone(),
245        f: f_vals[0],
246        grad: Vec::new(),
247        iterations,
248        n_feval,
249        n_geval: 0,
250        converged,
251        message,
252        status,
253        history,
254        lambda_eq: Vec::new(),
255        lambda_ineq: Vec::new(),
256        active_bounds: Vec::new(),
257        constraint_violation: S::ZERO,
258        wall_time_secs: 0.0,
259        pareto: None,
260        sensitivity: None,
261    }
262    .with_wall_time(start))
263}
264
265// ─── Powell's Conjugate Direction Method ─────────────────────────────────
266
267/// Options for Powell's conjugate direction method.
268#[derive(Clone, Debug)]
269pub struct PowellOptions<S: Scalar> {
270    pub max_iter: usize,
271    pub ftol: S,
272    pub xtol: S,
273    pub max_line_search_iter: usize,
274    pub verbose: bool,
275}
276
277impl<S: Scalar> Default for PowellOptions<S> {
278    fn default() -> Self {
279        Self {
280            max_iter: 10_000,
281            ftol: S::from_f64(1e-8),
282            xtol: S::from_f64(1e-8),
283            max_line_search_iter: 100,
284            verbose: false,
285        }
286    }
287}
288
289#[allow(clippy::needless_range_loop)]
290/// Powell's conjugate direction method for derivative-free optimization.
291///
292/// Builds a set of conjugate directions by performing sequential line minimizations.
293/// The direction set is updated each iteration to maintain conjugacy.
294///
295/// # Arguments
296///
297/// * `f` - Objective function.
298/// * `x0` - Initial guess.
299/// * `opts` - Algorithm options.
300pub fn powell<S, F>(f: F, x0: &[S], opts: &PowellOptions<S>) -> Result<OptimResult<S>, OptimError>
301where
302    S: Scalar,
303    F: Fn(&[S]) -> S,
304{
305    let start = std::time::Instant::now();
306    let n = x0.len();
307    if n == 0 {
308        return Err(OptimError::DimensionMismatch {
309            expected: 1,
310            actual: 0,
311        });
312    }
313
314    // Initialize direction set to identity (coordinate directions)
315    let mut directions: Vec<Vec<S>> = (0..n)
316        .map(|i| {
317            let mut d = vec![S::ZERO; n];
318            d[i] = S::ONE;
319            d
320        })
321        .collect();
322
323    let mut x = x0.to_vec();
324    let mut f_x = f(&x);
325    let mut n_feval = 1_usize;
326    let mut history: Vec<IterationRecord<S>> = Vec::new();
327    let mut converged = false;
328    let mut iterations = 0;
329
330    for iter in 0..opts.max_iter {
331        iterations = iter + 1;
332        let f_start = f_x;
333        let x_start = x.clone();
334
335        // Track which direction gave the biggest decrease
336        let mut max_decrease = S::ZERO;
337        let mut max_decrease_idx = 0;
338
339        // Line search along each direction
340        for i in 0..n {
341            let f_before = f_x;
342            let (x_new, f_new, evals) =
343                line_minimize(&f, &x, &directions[i], f_x, opts.max_line_search_iter);
344            n_feval += evals;
345            let decrease = f_before - f_new;
346            if decrease > max_decrease {
347                max_decrease = decrease;
348                max_decrease_idx = i;
349            }
350            x = x_new;
351            f_x = f_new;
352        }
353
354        if opts.verbose && iter % 50 == 0 {
355            eprintln!("Powell iter {}: f = {:.6e}", iter, f_x.to_f64());
356        }
357
358        history.push(IterationRecord {
359            iteration: iter,
360            objective: f_x,
361            gradient_norm: S::ZERO,
362            step_size: max_decrease,
363            constraint_violation: S::ZERO,
364        });
365
366        // Check convergence
367        let f_decrease = (f_start - f_x).abs();
368        let mut x_change = S::ZERO;
369        for j in 0..n {
370            let d = (x[j] - x_start[j]).abs();
371            if d > x_change {
372                x_change = d;
373            }
374        }
375
376        if f_decrease < opts.ftol && x_change < opts.xtol {
377            converged = true;
378            break;
379        }
380
381        // Update direction set: replace the direction with max decrease
382        // with the overall displacement direction
383        let new_dir: Vec<S> = (0..n).map(|j| x[j] - x_start[j]).collect();
384        let dir_norm = new_dir.iter().map(|&d| d * d).sum::<S>().sqrt();
385        if dir_norm > S::from_f64(1e-20) {
386            let normalized: Vec<S> = new_dir.iter().map(|&d| d / dir_norm).collect();
387            directions[max_decrease_idx] = normalized;
388        }
389    }
390
391    let (status, message) = if converged {
392        (
393            OptimStatus::FunctionConverged,
394            format!("Powell converged after {} iterations", iterations),
395        )
396    } else {
397        (
398            OptimStatus::MaxIterations,
399            format!("Powell: max iterations ({}) reached", opts.max_iter),
400        )
401    };
402
403    Ok(OptimResult {
404        x,
405        f: f_x,
406        grad: Vec::new(),
407        iterations,
408        n_feval,
409        n_geval: 0,
410        converged,
411        message,
412        status,
413        history,
414        lambda_eq: Vec::new(),
415        lambda_ineq: Vec::new(),
416        active_bounds: Vec::new(),
417        constraint_violation: S::ZERO,
418        wall_time_secs: 0.0,
419        pareto: None,
420        sensitivity: None,
421    }
422    .with_wall_time(start))
423}
424
425/// Brent's method for 1-D line minimization along direction `d` from `x`.
426///
427/// Uses bracket-then-Brent approach for robust convergence.
428/// Returns `(x_new, f_new, n_evaluations)`.
429fn line_minimize<S, F>(f: &F, x: &[S], d: &[S], f_x: S, max_iter: usize) -> (Vec<S>, S, usize)
430where
431    S: Scalar,
432    F: Fn(&[S]) -> S,
433{
434    let n = x.len();
435    let mut evals = 0;
436
437    // Helper: compute f at x + alpha * d
438    let f_at_alpha = |alpha: S, evals: &mut usize| -> S {
439        let xp: Vec<S> = (0..n).map(|j| x[j] + alpha * d[j]).collect();
440        *evals += 1;
441        f(&xp)
442    };
443
444    // Bracket the minimum using expansion (mnbrak-style)
445    let mut ax = S::ZERO;
446    let mut bx = S::ONE;
447    let mut fa = f_x;
448    let mut fb = f_at_alpha(bx, &mut evals);
449
450    // If f(b) > f(a), swap to ensure we go downhill
451    if fb > fa {
452        core::mem::swap(&mut ax, &mut bx);
453        core::mem::swap(&mut fa, &mut fb);
454    }
455
456    let gold = S::from_f64(1.618034); // golden ratio
457    let mut cx = bx + gold * (bx - ax);
458    let mut fc = f_at_alpha(cx, &mut evals);
459
460    // Expand bracket until we have fa >= fb <= fc (i.e., fb is in the valley)
461    let mut bracket_iters = 0;
462    while fb > fc && bracket_iters < 50 {
463        ax = bx;
464        fa = fb;
465        bx = cx;
466        fb = fc;
467        cx = bx + gold * (bx - ax);
468        fc = f_at_alpha(cx, &mut evals);
469        bracket_iters += 1;
470    }
471
472    // Ensure a < c
473    if ax > cx {
474        core::mem::swap(&mut ax, &mut cx);
475        core::mem::swap(&mut fa, &mut fc);
476    }
477
478    // Brent's method on [ax, cx] with bx as initial best
479    let tol = S::from_f64(1e-10);
480    let cgold = S::from_f64(0.381966011250105);
481    let mut a_br = ax;
482    let mut b_br = cx;
483    let mut w = bx; // second best
484    let mut v = bx; // previous w
485    let mut xmin = bx;
486    let mut fw = fb;
487    let mut fv = fb;
488    let mut fx_br = fb;
489    let mut e_br = S::ZERO; // distance moved two steps ago
490    let mut delta = S::ZERO;
491
492    for _ in 0..max_iter {
493        let midpoint = S::HALF * (a_br + b_br);
494        let tol1 = tol * xmin.abs() + S::from_f64(1e-20);
495        let tol2 = S::TWO * tol1;
496
497        if (xmin - midpoint).abs() <= tol2 - S::HALF * (b_br - a_br) {
498            break;
499        }
500
501        // Try parabolic interpolation
502        let mut use_golden = true;
503        if e_br.abs() > tol1 {
504            // Parabolic step
505            let r = (xmin - w) * (fx_br - fv);
506            let q = (xmin - v) * (fx_br - fw);
507            let p = (xmin - v) * q - (xmin - w) * r;
508            let q = S::TWO * (q - r);
509            let (p, q) = if q > S::ZERO { (-p, q) } else { (p, -q) };
510            let e_old = e_br;
511
512            if p.abs() < (S::HALF * q * e_old).abs()
513                && p > q * (a_br - xmin)
514                && p < q * (b_br - xmin)
515            {
516                // Parabolic step
517                delta = p / q;
518                let u = xmin + delta;
519                if (u - a_br) < tol2 || (b_br - u) < tol2 {
520                    delta = if xmin < midpoint { tol1 } else { -tol1 };
521                }
522                use_golden = false;
523                e_br = delta;
524            }
525        }
526
527        if use_golden {
528            // Golden section step
529            e_br = if xmin >= midpoint {
530                a_br - xmin
531            } else {
532                b_br - xmin
533            };
534            delta = cgold * e_br;
535        }
536
537        // Evaluate at new point
538        let u = if delta.abs() >= tol1 {
539            xmin + delta
540        } else {
541            xmin + if delta > S::ZERO { tol1 } else { -tol1 }
542        };
543        let fu = f_at_alpha(u, &mut evals);
544
545        if fu <= fx_br {
546            if u >= xmin {
547                a_br = xmin;
548            } else {
549                b_br = xmin;
550            }
551            v = w;
552            fv = fw;
553            w = xmin;
554            fw = fx_br;
555            xmin = u;
556            fx_br = fu;
557        } else {
558            if u < xmin {
559                a_br = u;
560            } else {
561                b_br = u;
562            }
563            if fu <= fw || (w - xmin).abs() < S::from_f64(1e-20) {
564                v = w;
565                fv = fw;
566                w = u;
567                fw = fu;
568            } else if fu <= fv
569                || (v - xmin).abs() < S::from_f64(1e-20)
570                || (v - w).abs() < S::from_f64(1e-20)
571            {
572                v = u;
573                fv = fu;
574            }
575        }
576    }
577
578    // Compare with original point
579    if f_x <= fx_br {
580        (x.to_vec(), f_x, evals)
581    } else {
582        let x_best: Vec<S> = (0..n).map(|j| x[j] + xmin * d[j]).collect();
583        (x_best, fx_br, evals)
584    }
585}
586
587#[cfg(test)]
588mod tests {
589    use super::*;
590
591    // ─── Nelder-Mead tests ───
592
593    #[test]
594    fn test_nelder_mead_quadratic() {
595        // min x0^2 + x1^2, optimal at (0, 0)
596        let result = nelder_mead(
597            |x: &[f64]| x[0] * x[0] + x[1] * x[1],
598            &[5.0, 3.0],
599            &NelderMeadOptions::default(),
600        )
601        .unwrap();
602        assert!(result.converged, "did not converge: {}", result.message);
603        assert!(result.x[0].abs() < 1e-4, "x0={}", result.x[0]);
604        assert!(result.x[1].abs() < 1e-4, "x1={}", result.x[1]);
605        assert!(result.f < 1e-8, "f={}", result.f);
606    }
607
608    #[test]
609    fn test_nelder_mead_rosenbrock() {
610        // Rosenbrock: min (1-x)^2 + 100*(y - x^2)^2
611        let result = nelder_mead(
612            |x: &[f64]| (1.0 - x[0]).powi(2) + 100.0 * (x[1] - x[0] * x[0]).powi(2),
613            &[-1.0, 1.0],
614            &NelderMeadOptions {
615                max_iter: 50_000,
616                ..Default::default()
617            },
618        )
619        .unwrap();
620        assert!(result.f < 1e-4, "f={}, x={:?}", result.f, result.x);
621        assert!(
622            (result.x[0] - 1.0).abs() < 0.05,
623            "x0={}, expected ~1.0",
624            result.x[0]
625        );
626    }
627
628    #[test]
629    fn test_nelder_mead_1d() {
630        let result = nelder_mead(
631            |x: &[f64]| (x[0] - 3.0).powi(2),
632            &[10.0],
633            &NelderMeadOptions::default(),
634        )
635        .unwrap();
636        assert!(result.converged);
637        assert!((result.x[0] - 3.0).abs() < 1e-4, "x={}", result.x[0]);
638    }
639
640    #[test]
641    fn test_nelder_mead_beale() {
642        // Beale function, min at (3, 0.5)
643        let result = nelder_mead(
644            |x: &[f64]| {
645                (1.5 - x[0] + x[0] * x[1]).powi(2)
646                    + (2.25 - x[0] + x[0] * x[1] * x[1]).powi(2)
647                    + (2.625 - x[0] + x[0] * x[1].powi(3)).powi(2)
648            },
649            &[1.0, 1.0],
650            &NelderMeadOptions {
651                max_iter: 50_000,
652                ..Default::default()
653            },
654        )
655        .unwrap();
656        assert!(result.f < 1e-4, "f={}", result.f);
657    }
658
659    // ─── Powell tests ───
660
661    #[test]
662    fn test_powell_quadratic() {
663        let result = powell(
664            |x: &[f64]| x[0] * x[0] + x[1] * x[1],
665            &[5.0, 3.0],
666            &PowellOptions::default(),
667        )
668        .unwrap();
669        assert!(result.converged, "did not converge: {}", result.message);
670        assert!(result.x[0].abs() < 1e-4, "x0={}", result.x[0]);
671        assert!(result.x[1].abs() < 1e-4, "x1={}", result.x[1]);
672    }
673
674    #[test]
675    fn test_powell_quadratic_offset() {
676        // min (x-2)^2 + (y-3)^2
677        let result = powell(
678            |x: &[f64]| (x[0] - 2.0).powi(2) + (x[1] - 3.0).powi(2),
679            &[0.0, 0.0],
680            &PowellOptions::default(),
681        )
682        .unwrap();
683        assert!(result.converged);
684        assert!((result.x[0] - 2.0).abs() < 1e-4, "x0={}", result.x[0]);
685        assert!((result.x[1] - 3.0).abs() < 1e-4, "x1={}", result.x[1]);
686    }
687
688    #[test]
689    fn test_powell_rosenbrock() {
690        let result = powell(
691            |x: &[f64]| (1.0 - x[0]).powi(2) + 100.0 * (x[1] - x[0] * x[0]).powi(2),
692            &[-1.0, 1.0],
693            &PowellOptions {
694                max_iter: 50_000,
695                ..Default::default()
696            },
697        )
698        .unwrap();
699        assert!(result.f < 0.01, "f={}", result.f);
700    }
701
702    #[test]
703    fn test_powell_1d() {
704        let result = powell(
705            |x: &[f64]| (x[0] - 7.0).powi(2),
706            &[0.0],
707            &PowellOptions::default(),
708        )
709        .unwrap();
710        assert!(result.converged);
711        assert!((result.x[0] - 7.0).abs() < 1e-4, "x={}", result.x[0]);
712    }
713}
numra_optim/derivative_free.rs

numra_optim/
derivative_free.rs