scirs2_optimize/robust/
minimax.rs

1//! Minimax Optimization Methods for Robust Optimization
2//!
3//! This module implements minimax (min-max) optimization algorithms that minimize
4//! the maximum of a collection of functions:
5//!
6//! ```text
7//! min_x  max_{i ∈ [k]}  f_i(x)
8//! ```
9//!
10//! Such problems arise in robust optimization (worst-case performance),
11//! Chebyshev approximation, and multi-objective optimization.
12//!
13//! # Algorithms
14//!
15//! - [`minimax_subgradient`]: Subgradient descent for non-smooth minimax
16//! - [`minimax_bundle`]: Bundle method for accurate non-smooth minimax
17//! - [`smooth_minimax`]: Nesterov log-sum-exp smoothing technique
18//! - [`game_theoretic_minimax`]: Zero-sum game Nash equilibrium via fictitious play
19//!
20//! # References
21//!
22//! - Shor, N.Z. (1985). *Minimization Methods for Non-Differentiable Functions*.
23//! - Nesterov, Y. (2005). "Smooth minimization of non-smooth functions". *Mathematical Programming*.
24//! - Brown, G.W. (1951). "Iterative solution of games by fictitious play". *Activity Analysis of Production and Allocation*.
25//! - Lemaréchal, C. (1975). "An extension of Davidon methods to nondifferentiable problems". *Mathematical Programming Study*.
26
27use crate::error::{OptimizeError, OptimizeResult};
28use scirs2_core::ndarray::{Array1, Array2, ArrayView1};
29
30// ─── Data types ───────────────────────────────────────────────────────────────
31
32/// A minimax problem: minimize the maximum over k functions.
33///
34/// Represents:
35/// ```text
36/// min_x  max_{i=0..k-1}  f_i(x)
37/// ```
38///
39/// # Example
40///
41/// ```rust
42/// use scirs2_optimize::robust::minimax::MinimaxProblem;
43/// use scirs2_core::ndarray::ArrayView1;
44///
45/// // max(f0, f1) where f0(x)=x[0]+1, f1(x)=-x[0]+2
46/// let problem = MinimaxProblem::new(vec![
47///     Box::new(|x: &ArrayView1<f64>| x[0] + 1.0),
48///     Box::new(|x: &ArrayView1<f64>| -x[0] + 2.0),
49/// ]);
50/// ```
51pub struct MinimaxProblem {
52    /// The functions f_i(x) being maximized.
53    pub funcs: Vec<Box<dyn Fn(&ArrayView1<f64>) -> f64 + Send + Sync>>,
54}
55
56impl MinimaxProblem {
57    /// Create a new minimax problem from a collection of functions.
58    pub fn new(funcs: Vec<Box<dyn Fn(&ArrayView1<f64>) -> f64 + Send + Sync>>) -> Self {
59        Self { funcs }
60    }
61
62    /// Evaluate max_i f_i(x).
63    pub fn eval_max(&self, x: &ArrayView1<f64>) -> f64 {
64        self.funcs
65            .iter()
66            .map(|f| f(x))
67            .fold(f64::NEG_INFINITY, f64::max)
68    }
69
70    /// Evaluate all function values f_i(x).
71    pub fn eval_all(&self, x: &ArrayView1<f64>) -> Vec<f64> {
72        self.funcs.iter().map(|f| f(x)).collect()
73    }
74
75    /// Return the index of the maximizing function at x.
76    pub fn argmax(&self, x: &ArrayView1<f64>) -> usize {
77        let vals = self.eval_all(x);
78        vals.iter()
79            .enumerate()
80            .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
81            .map(|(i, _)| i)
82            .unwrap_or(0)
83    }
84
85    /// Return the number of component functions.
86    pub fn num_funcs(&self) -> usize {
87        self.funcs.len()
88    }
89}
90
91/// Configuration for minimax solvers.
92#[derive(Debug, Clone)]
93pub struct MinimaxSolverConfig {
94    /// Maximum number of outer iterations.
95    pub max_iter: usize,
96    /// Convergence tolerance.
97    pub tol: f64,
98    /// Initial step size.
99    pub step_size: f64,
100    /// Step size decay factor (multiplied each iteration for diminishing step sizes).
101    pub step_decay: f64,
102    /// Finite-difference step for gradient approximation.
103    pub fd_step: f64,
104    /// Smoothing parameter μ for log-sum-exp (used by smooth_minimax).
105    pub smoothing_mu: f64,
106    /// Number of fictitious play iterations (used by game_theoretic_minimax).
107    pub fictitious_play_iter: usize,
108}
109
110impl Default for MinimaxSolverConfig {
111    fn default() -> Self {
112        Self {
113            max_iter: 2_000,
114            tol: 1e-6,
115            step_size: 1e-2,
116            step_decay: 1.0, // constant step size by default
117            fd_step: 1e-5,
118            smoothing_mu: 0.1,
119            fictitious_play_iter: 1_000,
120        }
121    }
122}
123
124/// Result of a minimax solve.
125#[derive(Debug, Clone)]
126pub struct MinimaxSolveResult {
127    /// Approximate minimizer x*.
128    pub x: Array1<f64>,
129    /// Minimax value max_i f_i(x*).
130    pub fun: f64,
131    /// Index of the active (maximizing) constraint at convergence.
132    pub active_index: usize,
133    /// Number of outer iterations performed.
134    pub n_iter: usize,
135    /// Whether the algorithm converged.
136    pub converged: bool,
137    /// Status message.
138    pub message: String,
139}
140
141// ─── Internal helpers ─────────────────────────────────────────────────────────
142
143/// Forward finite-difference gradient of a scalar function.
144fn fd_gradient<F>(f: &F, x: &ArrayView1<f64>, h: f64) -> Array1<f64>
145where
146    F: Fn(&ArrayView1<f64>) -> f64,
147{
148    let n = x.len();
149    let f0 = f(x);
150    let mut g = Array1::<f64>::zeros(n);
151    let mut x_fwd = x.to_owned();
152    for i in 0..n {
153        x_fwd[i] += h;
154        g[i] = (f(&x_fwd.view()) - f0) / h;
155        x_fwd[i] = x[i];
156    }
157    g
158}
159
160/// L2 norm of a vector.
161#[inline]
162fn l2_norm(v: &Array1<f64>) -> f64 {
163    v.iter().map(|vi| vi * vi).sum::<f64>().sqrt()
164}
165
166// ─── Subgradient Descent ──────────────────────────────────────────────────────
167
168/// Minimize max_i f_i(x) using subgradient descent.
169///
170/// At each step, the method selects a subgradient from the active function
171/// (the one achieving the maximum) and takes a descent step.
172///
173/// The step size schedule is:
174/// - constant: αₖ = α₀                    (step_decay = 1.0)
175/// - diminishing: αₖ = α₀ / √k            (step_decay < 1.0, set manually)
176///
177/// # Arguments
178///
179/// * `problem` – the minimax problem
180/// * `x0`      – initial point (n-vector)
181/// * `config`  – solver configuration
182///
183/// # Returns
184///
185/// [`MinimaxSolveResult`] with the approximate minimizer.
186///
187/// # References
188///
189/// Shor (1985), *Minimization Methods for Non-Differentiable Functions*.
190pub fn minimax_subgradient(
191    problem: &MinimaxProblem,
192    x0: &ArrayView1<f64>,
193    config: &MinimaxSolverConfig,
194) -> OptimizeResult<MinimaxSolveResult> {
195    if problem.num_funcs() == 0 {
196        return Err(OptimizeError::ValueError(
197            "MinimaxProblem must contain at least one function".to_string(),
198        ));
199    }
200    let n = x0.len();
201    if n == 0 {
202        return Err(OptimizeError::ValueError(
203            "x0 must be non-empty".to_string(),
204        ));
205    }
206
207    let mut x = x0.to_owned();
208    // Track best point (lowest minimax value seen so far)
209    let mut x_best = x.clone();
210    let mut val_best = problem.eval_max(&x.view());
211    let h = config.fd_step;
212    let mut converged = false;
213
214    for k in 0..config.max_iter {
215        let vals = problem.eval_all(&x.view());
216        let max_val = vals.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
217
218        // Select index achieving the maximum (subgradient belongs to the active function)
219        let active_idx = vals
220            .iter()
221            .enumerate()
222            .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
223            .map(|(i, _)| i)
224            .unwrap_or(0);
225
226        if max_val < val_best {
227            val_best = max_val;
228            x_best = x.clone();
229        }
230
231        // Compute subgradient of max_i f_i at x: a subgradient of f_{active_idx}
232        let subgrad = fd_gradient(
233            &|v: &ArrayView1<f64>| problem.funcs[active_idx](v),
234            &x.view(),
235            h,
236        );
237        let sg_norm = l2_norm(&subgrad);
238
239        if sg_norm < config.tol {
240            converged = true;
241            break;
242        }
243
244        // Diminishing step size: α_k = α₀ / √(k+1)
245        let alpha = config.step_size / ((k as f64 + 1.0).sqrt());
246
247        // Subgradient descent step
248        for i in 0..n {
249            x[i] -= alpha * subgrad[i];
250        }
251    }
252
253    let active_idx = problem.argmax(&x_best.view());
254    Ok(MinimaxSolveResult {
255        fun: val_best,
256        active_index: active_idx,
257        n_iter: config.max_iter,
258        converged,
259        message: if converged {
260            "Subgradient descent converged".to_string()
261        } else {
262            "Subgradient descent reached maximum iterations".to_string()
263        },
264        x: x_best,
265    })
266}
267
268// ─── Bundle Method ────────────────────────────────────────────────────────────
269
270/// A cutting plane (bundle element): linear lower model for f around a point.
271#[derive(Debug, Clone)]
272struct BundleCut {
273    /// Reference point where the cut was generated.
274    point: Array1<f64>,
275    /// Function value at the reference point.
276    value: f64,
277    /// Subgradient at the reference point.
278    subgrad: Array1<f64>,
279}
280
281/// Minimize max_i f_i(x) using a simplified bundle method.
282///
283/// The bundle method maintains a *model* of the objective using past subgradients
284/// (cutting planes). At each step, a proximal quadratic subproblem is solved:
285///
286/// ```text
287/// min_{d}  max_j { f_j(y_j) + gⱼᵀ (x + d - yⱼ) } + (1/2t) ‖d‖²
288/// ```
289///
290/// where t is a proximity parameter. This gives better descent than plain
291/// subgradient methods.
292///
293/// # Arguments
294///
295/// * `problem` – the minimax problem
296/// * `x0`      – initial point
297/// * `config`  – solver configuration
298///
299/// # Returns
300///
301/// [`MinimaxSolveResult`] with the approximate minimizer.
302///
303/// # References
304///
305/// Lemaréchal (1975); Kiwiel (1990) *Methods of Descent for Nondifferentiable Optimization*.
306pub fn minimax_bundle(
307    problem: &MinimaxProblem,
308    x0: &ArrayView1<f64>,
309    config: &MinimaxSolverConfig,
310) -> OptimizeResult<MinimaxSolveResult> {
311    if problem.num_funcs() == 0 {
312        return Err(OptimizeError::ValueError(
313            "MinimaxProblem must contain at least one function".to_string(),
314        ));
315    }
316    let n = x0.len();
317    if n == 0 {
318        return Err(OptimizeError::ValueError(
319            "x0 must be non-empty".to_string(),
320        ));
321    }
322
323    let h = config.fd_step;
324    let max_bundle_size = 20_usize;
325    // Proximity parameter controls trade-off between model accuracy and step length
326    let prox_t = config.step_size;
327
328    let mut x = x0.to_owned();
329    let mut x_best = x.clone();
330    let mut val_best = problem.eval_max(&x.view());
331    let mut bundle: Vec<BundleCut> = Vec::with_capacity(max_bundle_size);
332    let mut converged = false;
333
334    // Helper: evaluate the bundle model at x + d
335    // model(d) = max_{j} { v_j + g_j^T (x + d - y_j) }
336    let eval_model = |x_cur: &Array1<f64>, d: &Array1<f64>, cuts: &[BundleCut]| -> f64 {
337        cuts.iter()
338            .map(|cut| {
339                let diff: f64 = x_cur
340                    .iter()
341                    .zip(d.iter())
342                    .zip(cut.point.iter())
343                    .map(|((&xc, &dc), &yj)| xc + dc - yj)
344                    .zip(cut.subgrad.iter())
345                    .map(|(delta, &gj)| delta * gj)
346                    .sum::<f64>();
347                cut.value + diff
348            })
349            .fold(f64::NEG_INFINITY, f64::max)
350    };
351
352    for k in 0..config.max_iter {
353        let vals = problem.eval_all(&x.view());
354        let max_val = vals.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
355
356        if max_val < val_best {
357            val_best = max_val;
358            x_best = x.clone();
359        }
360
361        // Generate a new bundle cut from each function achieving near-maximum
362        let threshold = max_val - config.tol;
363        let new_cuts: Vec<BundleCut> = vals
364            .iter()
365            .enumerate()
366            .filter(|(_, &v)| v >= threshold)
367            .map(|(i, _)| {
368                let subgrad = fd_gradient(&|v: &ArrayView1<f64>| problem.funcs[i](v), &x.view(), h);
369                BundleCut {
370                    point: x.clone(),
371                    value: vals[i],
372                    subgrad,
373                }
374            })
375            .collect();
376
377        bundle.extend(new_cuts);
378
379        // Trim bundle to max size (keep most recent cuts)
380        if bundle.len() > max_bundle_size {
381            let start = bundle.len() - max_bundle_size;
382            bundle.drain(..start);
383        }
384
385        if bundle.is_empty() {
386            break;
387        }
388
389        // Solve the proximal bundle subproblem via projected gradient on d:
390        //   min_{d}  model(x, d, bundle) + (1/2t) ‖d‖²
391        // We use a simple gradient method on d (inner loop).
392        let mut d = Array1::<f64>::zeros(n);
393        let inner_steps = 100_usize;
394        let inner_step = prox_t * 0.5;
395
396        for _ in 0..inner_steps {
397            // Gradient of bundle model at current d (subgradient of max over cuts)
398            let active_cut = bundle
399                .iter()
400                .enumerate()
401                .max_by(|(_, a), (_, b)| {
402                    let va = a.value
403                        + a.point
404                            .iter()
405                            .zip(d.iter())
406                            .zip(x.iter())
407                            .map(|((&yj, &dj), &xc)| {
408                                a.subgrad[{
409                                    // inline the index via manual accumulation
410                                    0
411                                }] * (xc + dj - yj)
412                            })
413                            .sum::<f64>();
414                    let vb = b.value
415                        + b.point
416                            .iter()
417                            .zip(d.iter())
418                            .zip(x.iter())
419                            .map(|((&yj, &dj), &xc)| b.subgrad[0] * (xc + dj - yj))
420                            .sum::<f64>();
421                    va.partial_cmp(&vb).unwrap_or(std::cmp::Ordering::Equal)
422                })
423                .map(|(i, _)| i)
424                .unwrap_or(0);
425
426            // Proper active cut selection: re-evaluate scalar model for each cut
427            let active_idx = bundle
428                .iter()
429                .enumerate()
430                .max_by(|(_, ca), (_, cb)| {
431                    let va: f64 = ca.value
432                        + ca.subgrad
433                            .iter()
434                            .zip(x.iter().zip(d.iter()).zip(ca.point.iter()))
435                            .map(|(&gj, ((&xc, &dc), &yj))| gj * (xc + dc - yj))
436                            .sum::<f64>();
437                    let vb: f64 = cb.value
438                        + cb.subgrad
439                            .iter()
440                            .zip(x.iter().zip(d.iter()).zip(cb.point.iter()))
441                            .map(|(&gj, ((&xc, &dc), &yj))| gj * (xc + dc - yj))
442                            .sum::<f64>();
443                    va.partial_cmp(&vb).unwrap_or(std::cmp::Ordering::Equal)
444                })
445                .map(|(i, _)| i)
446                .unwrap_or(active_cut);
447
448            let cut = &bundle[active_idx];
449            // Gradient of model + proximal term w.r.t. d:
450            //   g_model(d) = g_active  (subgradient of max)
451            //   g_prox(d)  = d / t
452            let grad_d: Array1<f64> = cut
453                .subgrad
454                .iter()
455                .zip(d.iter())
456                .map(|(&gj, &dj)| gj + dj / prox_t)
457                .collect();
458
459            let step_norm = l2_norm(&grad_d);
460            if step_norm < config.tol * 0.1 {
461                break;
462            }
463            for i in 0..n {
464                d[i] -= inner_step * grad_d[i];
465            }
466        }
467
468        // Check descent: only accept step if model predicts sufficient decrease
469        let d_norm = l2_norm(&d);
470        if d_norm < config.tol {
471            converged = true;
472            break;
473        }
474
475        // Serious step: update x
476        for i in 0..n {
477            x[i] += d[i];
478        }
479
480        // Null step convergence check: if max value not decreasing
481        let new_max = problem.eval_max(&x.view());
482        if (new_max - max_val).abs() < config.tol && k > 10 {
483            converged = true;
484            break;
485        }
486    }
487
488    let active_idx = problem.argmax(&x_best.view());
489    Ok(MinimaxSolveResult {
490        x: x_best,
491        fun: val_best,
492        active_index: active_idx,
493        n_iter: config.max_iter,
494        converged,
495        message: if converged {
496            "Bundle method converged".to_string()
497        } else {
498            "Bundle method reached maximum iterations".to_string()
499        },
500    })
501}
502
503// ─── Nesterov Smooth Minimax ──────────────────────────────────────────────────
504
505/// Minimize max_i f_i(x) via Nesterov's log-sum-exp smoothing.
506///
507/// The non-smooth objective  F(x) = max_i f_i(x)  is replaced by the smooth
508/// surrogate:
509///
510/// ```text
511/// F_μ(x) = (μ/k) · log(Σ_i exp(f_i(x) / μ))  →  F(x)  as  μ → 0
512/// ```
513///
514/// The error satisfies F(x) ≤ F_μ(x) ≤ F(x) + μ·log(k), so for small μ
515/// F_μ is an accurate smooth upper bound. Gradient descent on F_μ converges
516/// at rate O(1/k²) (Nesterov accelerated gradient).
517///
518/// # Arguments
519///
520/// * `problem` – the minimax problem with k functions
521/// * `x0`      – initial point
522/// * `config`  – solver configuration (uses `smoothing_mu` as μ)
523///
524/// # Returns
525///
526/// [`MinimaxSolveResult`] with the approximate minimizer.
527///
528/// # References
529///
530/// Nesterov, Y. (2005). "Smooth minimization of non-smooth functions".
531/// *Mathematical Programming*, 103(1), 127–152.
532pub fn smooth_minimax(
533    problem: &MinimaxProblem,
534    x0: &ArrayView1<f64>,
535    config: &MinimaxSolverConfig,
536) -> OptimizeResult<MinimaxSolveResult> {
537    if problem.num_funcs() == 0 {
538        return Err(OptimizeError::ValueError(
539            "MinimaxProblem must contain at least one function".to_string(),
540        ));
541    }
542    let n = x0.len();
543    if n == 0 {
544        return Err(OptimizeError::ValueError(
545            "x0 must be non-empty".to_string(),
546        ));
547    }
548    let mu = config.smoothing_mu;
549    if mu <= 0.0 {
550        return Err(OptimizeError::ValueError(format!(
551            "smoothing_mu must be positive, got {}",
552            mu
553        )));
554    }
555
556    let k = problem.num_funcs() as f64;
557    let h = config.fd_step;
558
559    // Smooth surrogate: F_μ(x) = μ * log( (1/k) Σ_i exp(f_i(x) / μ) )
560    // This is the "soft-max" approximation.
561    let smooth_obj = |x: &ArrayView1<f64>| -> f64 {
562        let vals = problem.eval_all(x);
563        // Use numerically stable log-sum-exp
564        let max_val = vals.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
565        if max_val.is_infinite() {
566            return max_val;
567        }
568        let sum_exp: f64 = vals.iter().map(|&v| ((v - max_val) / mu).exp()).sum();
569        mu * (sum_exp.ln() + (max_val / mu)) - mu * k.ln()
570    };
571
572    let mut x = x0.to_owned();
573    let mut x_best = x.clone();
574    let mut val_best = problem.eval_max(&x.view());
575    let mut converged = false;
576
577    // Nesterov accelerated gradient (AGD) / FISTA-style
578    let mut y = x.clone(); // extrapolated point
579    let mut t_k = 1.0_f64; // momentum coefficient
580
581    for _ in 0..config.max_iter {
582        // Gradient of smooth objective at extrapolated point y
583        let grad = fd_gradient(&smooth_obj, &y.view(), h);
584        let grad_norm = l2_norm(&grad);
585
586        if grad_norm < config.tol {
587            converged = true;
588            break;
589        }
590
591        // Gradient descent step on y
592        let x_new: Array1<f64> = y
593            .iter()
594            .zip(grad.iter())
595            .map(|(&yi, &gi)| yi - config.step_size * gi)
596            .collect();
597
598        // Nesterov momentum update
599        let t_new = (1.0 + (1.0 + 4.0 * t_k * t_k).sqrt()) / 2.0;
600        let mom = (t_k - 1.0) / t_new;
601
602        let y_new: Array1<f64> = x_new
603            .iter()
604            .zip(x.iter())
605            .map(|(&xn, &xo)| xn + mom * (xn - xo))
606            .collect();
607
608        // Evaluate true minimax at new x
609        let max_val = problem.eval_max(&x_new.view());
610        if max_val < val_best {
611            val_best = max_val;
612            x_best = x_new.clone();
613        }
614
615        x = x_new;
616        y = y_new;
617        t_k = t_new;
618    }
619
620    let active_idx = problem.argmax(&x_best.view());
621    Ok(MinimaxSolveResult {
622        x: x_best,
623        fun: val_best,
624        active_index: active_idx,
625        n_iter: config.max_iter,
626        converged,
627        message: if converged {
628            "Smooth minimax (Nesterov) converged".to_string()
629        } else {
630            "Smooth minimax reached maximum iterations".to_string()
631        },
632    })
633}
634
635// ─── Game-Theoretic Minimax (Fictitious Play) ─────────────────────────────────
636
637/// Result of a game-theoretic minimax solve.
638#[derive(Debug, Clone)]
639pub struct GameMinimaxResult {
640    /// Approximate minimizer strategy (mixed or pure).
641    pub x: Array1<f64>,
642    /// Minimax value.
643    pub fun: f64,
644    /// Mixed strategy (probability distribution over pure strategies for the maximizer).
645    /// Entry i is the empirical frequency of function i being active.
646    pub maximizer_strategy: Array1<f64>,
647    /// Number of iterations performed.
648    pub n_iter: usize,
649    /// Whether the algorithm converged.
650    pub converged: bool,
651    /// Status message.
652    pub message: String,
653}
654
655/// Solve a zero-sum game minimax via fictitious play.
656///
657/// Fictitious play (Brown 1951) is a classical algorithm for zero-sum games:
658/// each player responds optimally to the *empirical distribution* of the
659/// opponent's past strategies.
660///
661/// For the continuous minimax problem, we discretize the minimizer's action
662/// space around x₀ (using a simplex of perturbations) and apply:
663///
664/// 1. Maximizer picks the function i* = argmax_i Σ_j q_j f_i(x_j)
665///    (best response to current minimizer mixture).
666/// 2. Minimizer picks x* = argmin_x f_{i*}(x) + (sum of past active gradients).
667///    (best response to current maximizer mixture q).
668/// 3. Update empirical frequencies.
669///
670/// **Note**: This implementation uses a gradient-based inner response for the
671/// minimizer (projected gradient descent on the currently active function),
672/// which gives a tractable continuous analogue of fictitious play.
673///
674/// # Arguments
675///
676/// * `problem`       – the minimax problem
677/// * `x0`            – initial minimizer point
678/// * `step_size`     – step size for the minimizer's gradient response
679/// * `config`        – solver configuration (uses `fictitious_play_iter`)
680///
681/// # Returns
682///
683/// [`GameMinimaxResult`] with approximate Nash equilibrium.
684///
685/// # References
686///
687/// Brown, G.W. (1951). "Iterative solution of games by fictitious play".
688/// In *Activity Analysis of Production and Allocation*, pp. 374–376.
689pub fn game_theoretic_minimax(
690    problem: &MinimaxProblem,
691    x0: &ArrayView1<f64>,
692    step_size: f64,
693    config: &MinimaxSolverConfig,
694) -> OptimizeResult<GameMinimaxResult> {
695    if problem.num_funcs() == 0 {
696        return Err(OptimizeError::ValueError(
697            "MinimaxProblem must contain at least one function".to_string(),
698        ));
699    }
700    let n = x0.len();
701    if n == 0 {
702        return Err(OptimizeError::ValueError(
703            "x0 must be non-empty".to_string(),
704        ));
705    }
706    let k = problem.num_funcs();
707    let h = config.fd_step;
708    let max_fp_iter = config.fictitious_play_iter;
709
710    // Empirical frequency counts for each function (maximizer's mixed strategy)
711    let mut counts = vec![0_usize; k];
712    let mut x = x0.to_owned();
713    let mut x_best = x.clone();
714    let mut val_best = problem.eval_max(&x.view());
715    let mut converged = false;
716
717    // Maintain a cumulative "average gradient" to guide the minimizer
718    let mut cumulative_grad = Array1::<f64>::zeros(n);
719    let mut cumulative_count = 0_usize;
720
721    for iter in 0..max_fp_iter {
722        // ── Maximizer's best response ───────────────────────────────────────
723        // Compute the expected loss under the current minimizer iterate x
724        let vals = problem.eval_all(&x.view());
725
726        // Maximizer picks the function with highest value (best response)
727        let active_i = vals
728            .iter()
729            .enumerate()
730            .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
731            .map(|(i, _)| i)
732            .unwrap_or(0);
733
734        counts[active_i] += 1;
735        cumulative_count += 1;
736
737        let max_val = vals[active_i];
738        if max_val < val_best {
739            val_best = max_val;
740            x_best = x.clone();
741        }
742
743        // ── Minimizer's best response ───────────────────────────────────────
744        // Compute gradient of the active function and accumulate
745        let grad_i = fd_gradient(
746            &|v: &ArrayView1<f64>| problem.funcs[active_i](v),
747            &x.view(),
748            h,
749        );
750        for i in 0..n {
751            cumulative_grad[i] += grad_i[i];
752        }
753
754        // Minimizer takes a step against the cumulative (average) gradient
755        let avg_grad_norm: f64 =
756            cumulative_grad.iter().map(|&g| g * g).sum::<f64>().sqrt() / cumulative_count as f64;
757
758        if avg_grad_norm < config.tol {
759            converged = true;
760            break;
761        }
762
763        // Step in direction of negative average gradient (best response update)
764        let alpha = step_size / ((iter as f64 + 1.0).sqrt());
765        for i in 0..n {
766            x[i] -= alpha * cumulative_grad[i] / cumulative_count as f64;
767        }
768
769        // Convergence: check if active function value is stable
770        if iter > 10 && (max_val - problem.eval_max(&x.view())).abs() < config.tol {
771            converged = true;
772            break;
773        }
774    }
775
776    // Compute empirical mixed strategy for the maximizer
777    let total = counts.iter().sum::<usize>().max(1) as f64;
778    let maximizer_strategy: Array1<f64> = counts.iter().map(|&c| c as f64 / total).collect();
779
780    Ok(GameMinimaxResult {
781        x: x_best,
782        fun: val_best,
783        maximizer_strategy,
784        n_iter: max_fp_iter,
785        converged,
786        message: if converged {
787            "Fictitious play converged".to_string()
788        } else {
789            "Fictitious play reached maximum iterations".to_string()
790        },
791    })
792}
793
794// ─── Convenience: evaluate smooth minimax objective ──────────────────────────
795
796/// Evaluate the Nesterov smooth upper bound of max_i f_i(x).
797///
798/// ```text
799/// F_μ(x) = μ · log( (1/k) Σ_i exp(f_i(x)/μ) )
800/// ```
801///
802/// # Arguments
803///
804/// * `funcs` – slice of function closures f_i
805/// * `x`     – evaluation point
806/// * `mu`    – smoothing parameter > 0 (smaller → tighter approximation)
807///
808/// # Returns
809///
810/// The smooth upper bound value.
811pub fn smooth_max_value<F>(funcs: &[F], x: &ArrayView1<f64>, mu: f64) -> OptimizeResult<f64>
812where
813    F: Fn(&ArrayView1<f64>) -> f64,
814{
815    if funcs.is_empty() {
816        return Err(OptimizeError::ValueError(
817            "funcs must be non-empty".to_string(),
818        ));
819    }
820    if mu <= 0.0 {
821        return Err(OptimizeError::ValueError(format!(
822            "mu must be positive, got {}",
823            mu
824        )));
825    }
826    let vals: Vec<f64> = funcs.iter().map(|f| f(x)).collect();
827    let max_val = vals.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
828    if max_val.is_infinite() {
829        return Ok(max_val);
830    }
831    let sum_exp: f64 = vals.iter().map(|&v| ((v - max_val) / mu).exp()).sum();
832    let k = funcs.len() as f64;
833    Ok(mu * (sum_exp.ln() + (max_val / mu)) - mu * k.ln())
834}
835
836// ─── Tests ────────────────────────────────────────────────────────────────────
837
838#[cfg(test)]
839mod tests {
840    use super::*;
841    use scirs2_core::ndarray::array;
842
843    /// Build the problem max(f0, f1) where:
844    ///   f0(x) = (x[0] - 1)²   (minimum at x=1)
845    ///   f1(x) = (x[0] + 1)²   (minimum at x=-1)
846    /// max(f0, f1) is minimised at x=0 where max = 1.
847    fn build_two_func_problem() -> MinimaxProblem {
848        MinimaxProblem::new(vec![
849            Box::new(|x: &ArrayView1<f64>| (x[0] - 1.0).powi(2)),
850            Box::new(|x: &ArrayView1<f64>| (x[0] + 1.0).powi(2)),
851        ])
852    }
853
854    #[test]
855    fn test_minimax_problem_eval() {
856        let p = build_two_func_problem();
857        let x = array![0.0];
858        assert_eq!(p.eval_max(&x.view()), 1.0);
859        let x2 = array![1.0];
860        // f0(1)=0, f1(1)=4 → max=4
861        assert!((p.eval_max(&x2.view()) - 4.0).abs() < 1e-9);
862    }
863
864    #[test]
865    fn test_subgradient_basic() {
866        let p = build_two_func_problem();
867        let x0 = array![3.0];
868        let config = MinimaxSolverConfig {
869            max_iter: 3_000,
870            tol: 1e-4,
871            step_size: 0.5,
872            ..Default::default()
873        };
874        let result = minimax_subgradient(&p, &x0.view(), &config).expect("failed to create result");
875        // Optimal at x=0, minimax value = 1
876        assert!(
877            result.fun <= 1.5,
878            "subgradient minimax value {} should be ≤ 1.5",
879            result.fun
880        );
881        assert!(
882            result.x[0].abs() < 1.0,
883            "subgradient minimizer {} should be near 0",
884            result.x[0]
885        );
886    }
887
888    #[test]
889    fn test_smooth_minimax_basic() {
890        let p = build_two_func_problem();
891        let x0 = array![3.0];
892        let config = MinimaxSolverConfig {
893            max_iter: 3_000,
894            tol: 1e-5,
895            step_size: 1e-2,
896            smoothing_mu: 0.05,
897            ..Default::default()
898        };
899        let result = smooth_minimax(&p, &x0.view(), &config).expect("failed to create result");
900        assert!(
901            result.fun <= 2.0,
902            "smooth minimax value {} should be ≤ 2.0",
903            result.fun
904        );
905    }
906
907    #[test]
908    fn test_game_theoretic_minimax() {
909        let p = build_two_func_problem();
910        let x0 = array![2.0];
911        let config = MinimaxSolverConfig {
912            max_iter: 500,
913            tol: 1e-4,
914            fictitious_play_iter: 500,
915            ..Default::default()
916        };
917        let result =
918            game_theoretic_minimax(&p, &x0.view(), 0.1, &config).expect("failed to create result");
919        // Should move towards x=0
920        assert!(
921            result.x[0].abs() < 2.5,
922            "game theoretic minimizer {} should move toward 0",
923            result.x[0]
924        );
925        // Mixed strategy should be non-trivial (both functions played)
926        assert_eq!(result.maximizer_strategy.len(), 2);
927        let strat_sum: f64 = result.maximizer_strategy.iter().sum();
928        assert!((strat_sum - 1.0).abs() < 1e-9, "strategy should sum to 1");
929    }
930
931    #[test]
932    fn test_smooth_max_value() {
933        let funcs: Vec<Box<dyn Fn(&ArrayView1<f64>) -> f64>> = vec![
934            Box::new(|_x: &ArrayView1<f64>| 1.0),
935            Box::new(|_x: &ArrayView1<f64>| 2.0),
936            Box::new(|_x: &ArrayView1<f64>| 3.0),
937        ];
938        let x = array![0.0];
939        let val = smooth_max_value(&funcs, &x.view(), 0.01).expect("failed to create val");
940        // With small mu, smooth_max ≈ true max = 3.0
941        assert!((val - 3.0).abs() < 0.1, "smooth max ≈ 3.0, got {val}");
942    }
943
944    #[test]
945    fn test_bundle_method_basic() {
946        let p = build_two_func_problem();
947        let x0 = array![2.0];
948        let config = MinimaxSolverConfig {
949            max_iter: 500,
950            tol: 1e-4,
951            step_size: 0.5,
952            ..Default::default()
953        };
954        let result = minimax_bundle(&p, &x0.view(), &config).expect("failed to create result");
955        assert!(
956            result.fun <= 5.0,
957            "bundle minimax value {} should be reasonable",
958            result.fun
959        );
960    }
961
962    #[test]
963    fn test_empty_problem_error() {
964        let p = MinimaxProblem::new(vec![]);
965        let x0 = array![1.0];
966        let config = MinimaxSolverConfig::default();
967        assert!(minimax_subgradient(&p, &x0.view(), &config).is_err());
968        assert!(smooth_minimax(&p, &x0.view(), &config).is_err());
969    }
970
971    #[test]
972    fn test_empty_x0_error() {
973        let p = build_two_func_problem();
974        let x0: Array1<f64> = Array1::zeros(0);
975        let config = MinimaxSolverConfig::default();
976        assert!(minimax_subgradient(&p, &x0.view(), &config).is_err());
977    }
978}
scirs2_optimize/robust/minimax.rs

scirs2_optimize/robust/
minimax.rs