scirs2-optimize 0.4.2

Optimization module for SciRS2 (scirs2-optimize)
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
//! Minimax Optimization Methods for Robust Optimization
//!
//! This module implements minimax (min-max) optimization algorithms that minimize
//! the maximum of a collection of functions:
//!
//! ```text
//! min_x  max_{i ∈ [k]}  f_i(x)
//! ```
//!
//! Such problems arise in robust optimization (worst-case performance),
//! Chebyshev approximation, and multi-objective optimization.
//!
//! # Algorithms
//!
//! - [`minimax_subgradient`]: Subgradient descent for non-smooth minimax
//! - [`minimax_bundle`]: Bundle method for accurate non-smooth minimax
//! - [`smooth_minimax`]: Nesterov log-sum-exp smoothing technique
//! - [`game_theoretic_minimax`]: Zero-sum game Nash equilibrium via fictitious play
//!
//! # References
//!
//! - Shor, N.Z. (1985). *Minimization Methods for Non-Differentiable Functions*.
//! - Nesterov, Y. (2005). "Smooth minimization of non-smooth functions". *Mathematical Programming*.
//! - Brown, G.W. (1951). "Iterative solution of games by fictitious play". *Activity Analysis of Production and Allocation*.
//! - Lemaréchal, C. (1975). "An extension of Davidon methods to nondifferentiable problems". *Mathematical Programming Study*.

use crate::error::{OptimizeError, OptimizeResult};
use scirs2_core::ndarray::{Array1, Array2, ArrayView1};

// ─── Data types ───────────────────────────────────────────────────────────────

/// A minimax problem: minimize the maximum over k functions.
///
/// Represents:
/// ```text
/// min_x  max_{i=0..k-1}  f_i(x)
/// ```
///
/// # Example
///
/// ```rust
/// use scirs2_optimize::robust::minimax::MinimaxProblem;
/// use scirs2_core::ndarray::ArrayView1;
///
/// // max(f0, f1) where f0(x)=x[0]+1, f1(x)=-x[0]+2
/// let problem = MinimaxProblem::new(vec![
///     Box::new(|x: &ArrayView1<f64>| x[0] + 1.0),
///     Box::new(|x: &ArrayView1<f64>| -x[0] + 2.0),
/// ]);
/// ```
pub struct MinimaxProblem {
    /// The functions f_i(x) being maximized.
    pub funcs: Vec<Box<dyn Fn(&ArrayView1<f64>) -> f64 + Send + Sync>>,
}

impl MinimaxProblem {
    /// Create a new minimax problem from a collection of functions.
    pub fn new(funcs: Vec<Box<dyn Fn(&ArrayView1<f64>) -> f64 + Send + Sync>>) -> Self {
        Self { funcs }
    }

    /// Evaluate max_i f_i(x).
    pub fn eval_max(&self, x: &ArrayView1<f64>) -> f64 {
        self.funcs
            .iter()
            .map(|f| f(x))
            .fold(f64::NEG_INFINITY, f64::max)
    }

    /// Evaluate all function values f_i(x).
    pub fn eval_all(&self, x: &ArrayView1<f64>) -> Vec<f64> {
        self.funcs.iter().map(|f| f(x)).collect()
    }

    /// Return the index of the maximizing function at x.
    pub fn argmax(&self, x: &ArrayView1<f64>) -> usize {
        let vals = self.eval_all(x);
        vals.iter()
            .enumerate()
            .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
            .map(|(i, _)| i)
            .unwrap_or(0)
    }

    /// Return the number of component functions.
    pub fn num_funcs(&self) -> usize {
        self.funcs.len()
    }
}

/// Configuration for minimax solvers.
#[derive(Debug, Clone)]
pub struct MinimaxSolverConfig {
    /// Maximum number of outer iterations.
    pub max_iter: usize,
    /// Convergence tolerance.
    pub tol: f64,
    /// Initial step size.
    pub step_size: f64,
    /// Step size decay factor (multiplied each iteration for diminishing step sizes).
    pub step_decay: f64,
    /// Finite-difference step for gradient approximation.
    pub fd_step: f64,
    /// Smoothing parameter μ for log-sum-exp (used by smooth_minimax).
    pub smoothing_mu: f64,
    /// Number of fictitious play iterations (used by game_theoretic_minimax).
    pub fictitious_play_iter: usize,
}

impl Default for MinimaxSolverConfig {
    fn default() -> Self {
        Self {
            max_iter: 2_000,
            tol: 1e-6,
            step_size: 1e-2,
            step_decay: 1.0, // constant step size by default
            fd_step: 1e-5,
            smoothing_mu: 0.1,
            fictitious_play_iter: 1_000,
        }
    }
}

/// Result of a minimax solve.
#[derive(Debug, Clone)]
pub struct MinimaxSolveResult {
    /// Approximate minimizer x*.
    pub x: Array1<f64>,
    /// Minimax value max_i f_i(x*).
    pub fun: f64,
    /// Index of the active (maximizing) constraint at convergence.
    pub active_index: usize,
    /// Number of outer iterations performed.
    pub n_iter: usize,
    /// Whether the algorithm converged.
    pub converged: bool,
    /// Status message.
    pub message: String,
}

// ─── Internal helpers ─────────────────────────────────────────────────────────

/// Forward finite-difference gradient of a scalar function.
fn fd_gradient<F>(f: &F, x: &ArrayView1<f64>, h: f64) -> Array1<f64>
where
    F: Fn(&ArrayView1<f64>) -> f64,
{
    let n = x.len();
    let f0 = f(x);
    let mut g = Array1::<f64>::zeros(n);
    let mut x_fwd = x.to_owned();
    for i in 0..n {
        x_fwd[i] += h;
        g[i] = (f(&x_fwd.view()) - f0) / h;
        x_fwd[i] = x[i];
    }
    g
}

/// L2 norm of a vector.
#[inline]
fn l2_norm(v: &Array1<f64>) -> f64 {
    v.iter().map(|vi| vi * vi).sum::<f64>().sqrt()
}

// ─── Subgradient Descent ──────────────────────────────────────────────────────

/// Minimize max_i f_i(x) using subgradient descent.
///
/// At each step, the method selects a subgradient from the active function
/// (the one achieving the maximum) and takes a descent step.
///
/// The step size schedule is:
/// - constant: αₖ = α₀                    (step_decay = 1.0)
/// - diminishing: αₖ = α₀ / √k            (step_decay < 1.0, set manually)
///
/// # Arguments
///
/// * `problem` – the minimax problem
/// * `x0`      – initial point (n-vector)
/// * `config`  – solver configuration
///
/// # Returns
///
/// [`MinimaxSolveResult`] with the approximate minimizer.
///
/// # References
///
/// Shor (1985), *Minimization Methods for Non-Differentiable Functions*.
pub fn minimax_subgradient(
    problem: &MinimaxProblem,
    x0: &ArrayView1<f64>,
    config: &MinimaxSolverConfig,
) -> OptimizeResult<MinimaxSolveResult> {
    if problem.num_funcs() == 0 {
        return Err(OptimizeError::ValueError(
            "MinimaxProblem must contain at least one function".to_string(),
        ));
    }
    let n = x0.len();
    if n == 0 {
        return Err(OptimizeError::ValueError(
            "x0 must be non-empty".to_string(),
        ));
    }

    let mut x = x0.to_owned();
    // Track best point (lowest minimax value seen so far)
    let mut x_best = x.clone();
    let mut val_best = problem.eval_max(&x.view());
    let h = config.fd_step;
    let mut converged = false;

    for k in 0..config.max_iter {
        let vals = problem.eval_all(&x.view());
        let max_val = vals
            .iter()
            .cloned()
            .fold(f64::NEG_INFINITY, f64::max);

        // Select index achieving the maximum (subgradient belongs to the active function)
        let active_idx = vals
            .iter()
            .enumerate()
            .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
            .map(|(i, _)| i)
            .unwrap_or(0);

        if max_val < val_best {
            val_best = max_val;
            x_best = x.clone();
        }

        // Compute subgradient of max_i f_i at x: a subgradient of f_{active_idx}
        let subgrad = fd_gradient(&|v: &ArrayView1<f64>| problem.funcs[active_idx](v), &x.view(), h);
        let sg_norm = l2_norm(&subgrad);

        if sg_norm < config.tol {
            converged = true;
            break;
        }

        // Diminishing step size: α_k = α₀ / √(k+1)
        let alpha = config.step_size / ((k as f64 + 1.0).sqrt());

        // Subgradient descent step
        for i in 0..n {
            x[i] -= alpha * subgrad[i];
        }
    }

    let active_idx = problem.argmax(&x_best.view());
    Ok(MinimaxSolveResult {
        fun: val_best,
        active_index: active_idx,
        n_iter: config.max_iter,
        converged,
        message: if converged {
            "Subgradient descent converged".to_string()
        } else {
            "Subgradient descent reached maximum iterations".to_string()
        },
        x: x_best,
    })
}

// ─── Bundle Method ────────────────────────────────────────────────────────────

/// A cutting plane (bundle element): linear lower model for f around a point.
#[derive(Debug, Clone)]
struct BundleCut {
    /// Reference point where the cut was generated.
    point: Array1<f64>,
    /// Function value at the reference point.
    value: f64,
    /// Subgradient at the reference point.
    subgrad: Array1<f64>,
}

/// Minimize max_i f_i(x) using a simplified bundle method.
///
/// The bundle method maintains a *model* of the objective using past subgradients
/// (cutting planes). At each step, a proximal quadratic subproblem is solved:
///
/// ```text
/// min_{d}  max_j { f_j(y_j) + gⱼᵀ (x + d - yⱼ) } + (1/2t) ‖d‖²
/// ```
///
/// where t is a proximity parameter. This gives better descent than plain
/// subgradient methods.
///
/// # Arguments
///
/// * `problem` – the minimax problem
/// * `x0`      – initial point
/// * `config`  – solver configuration
///
/// # Returns
///
/// [`MinimaxSolveResult`] with the approximate minimizer.
///
/// # References
///
/// Lemaréchal (1975); Kiwiel (1990) *Methods of Descent for Nondifferentiable Optimization*.
pub fn minimax_bundle(
    problem: &MinimaxProblem,
    x0: &ArrayView1<f64>,
    config: &MinimaxSolverConfig,
) -> OptimizeResult<MinimaxSolveResult> {
    if problem.num_funcs() == 0 {
        return Err(OptimizeError::ValueError(
            "MinimaxProblem must contain at least one function".to_string(),
        ));
    }
    let n = x0.len();
    if n == 0 {
        return Err(OptimizeError::ValueError(
            "x0 must be non-empty".to_string(),
        ));
    }

    let h = config.fd_step;
    let max_bundle_size = 20_usize;
    // Proximity parameter controls trade-off between model accuracy and step length
    let prox_t = config.step_size;

    let mut x = x0.to_owned();
    let mut x_best = x.clone();
    let mut val_best = problem.eval_max(&x.view());
    let mut bundle: Vec<BundleCut> = Vec::with_capacity(max_bundle_size);
    let mut converged = false;

    // Helper: evaluate the bundle model at x + d
    // model(d) = max_{j} { v_j + g_j^T (x + d - y_j) }
    let eval_model = |x_cur: &Array1<f64>, d: &Array1<f64>, cuts: &[BundleCut]| -> f64 {
        cuts.iter()
            .map(|cut| {
                let diff: f64 = x_cur
                    .iter()
                    .zip(d.iter())
                    .zip(cut.point.iter())
                    .map(|((&xc, &dc), &yj)| xc + dc - yj)
                    .zip(cut.subgrad.iter())
                    .map(|(delta, &gj)| delta * gj)
                    .sum::<f64>();
                cut.value + diff
            })
            .fold(f64::NEG_INFINITY, f64::max)
    };

    for k in 0..config.max_iter {
        let vals = problem.eval_all(&x.view());
        let max_val = vals.iter().cloned().fold(f64::NEG_INFINITY, f64::max);

        if max_val < val_best {
            val_best = max_val;
            x_best = x.clone();
        }

        // Generate a new bundle cut from each function achieving near-maximum
        let threshold = max_val - config.tol;
        let new_cuts: Vec<BundleCut> = vals
            .iter()
            .enumerate()
            .filter(|(_, &v)| v >= threshold)
            .map(|(i, _)| {
                let subgrad = fd_gradient(
                    &|v: &ArrayView1<f64>| problem.funcs[i](v),
                    &x.view(),
                    h,
                );
                BundleCut {
                    point: x.clone(),
                    value: vals[i],
                    subgrad,
                }
            })
            .collect();

        bundle.extend(new_cuts);

        // Trim bundle to max size (keep most recent cuts)
        if bundle.len() > max_bundle_size {
            let start = bundle.len() - max_bundle_size;
            bundle.drain(..start);
        }

        if bundle.is_empty() {
            break;
        }

        // Solve the proximal bundle subproblem via projected gradient on d:
        //   min_{d}  model(x, d, bundle) + (1/2t) ‖d‖²
        // We use a simple gradient method on d (inner loop).
        let mut d = Array1::<f64>::zeros(n);
        let inner_steps = 100_usize;
        let inner_step = prox_t * 0.5;

        for _ in 0..inner_steps {
            // Gradient of bundle model at current d (subgradient of max over cuts)
            let active_cut = bundle
                .iter()
                .enumerate()
                .max_by(|(_, a), (_, b)| {
                    let va = a.value
                        + a.point
                            .iter()
                            .zip(d.iter())
                            .zip(x.iter())
                            .map(|((&yj, &dj), &xc)| a.subgrad[{
                                // inline the index via manual accumulation
                                0
                            }] * (xc + dj - yj))
                            .sum::<f64>();
                    let vb = b.value
                        + b.point
                            .iter()
                            .zip(d.iter())
                            .zip(x.iter())
                            .map(|((&yj, &dj), &xc)| b.subgrad[0] * (xc + dj - yj))
                            .sum::<f64>();
                    va.partial_cmp(&vb).unwrap_or(std::cmp::Ordering::Equal)
                })
                .map(|(i, _)| i)
                .unwrap_or(0);

            // Proper active cut selection: re-evaluate scalar model for each cut
            let active_idx = bundle
                .iter()
                .enumerate()
                .max_by(|(_, ca), (_, cb)| {
                    let va: f64 = ca.value
                        + ca.subgrad
                            .iter()
                            .zip(x.iter().zip(d.iter()).zip(ca.point.iter()))
                            .map(|(&gj, ((&xc, &dc), &yj))| gj * (xc + dc - yj))
                            .sum::<f64>();
                    let vb: f64 = cb.value
                        + cb.subgrad
                            .iter()
                            .zip(x.iter().zip(d.iter()).zip(cb.point.iter()))
                            .map(|(&gj, ((&xc, &dc), &yj))| gj * (xc + dc - yj))
                            .sum::<f64>();
                    va.partial_cmp(&vb).unwrap_or(std::cmp::Ordering::Equal)
                })
                .map(|(i, _)| i)
                .unwrap_or(active_cut);

            let cut = &bundle[active_idx];
            // Gradient of model + proximal term w.r.t. d:
            //   g_model(d) = g_active  (subgradient of max)
            //   g_prox(d)  = d / t
            let grad_d: Array1<f64> = cut
                .subgrad
                .iter()
                .zip(d.iter())
                .map(|(&gj, &dj)| gj + dj / prox_t)
                .collect();

            let step_norm = l2_norm(&grad_d);
            if step_norm < config.tol * 0.1 {
                break;
            }
            for i in 0..n {
                d[i] -= inner_step * grad_d[i];
            }
        }

        // Check descent: only accept step if model predicts sufficient decrease
        let d_norm = l2_norm(&d);
        if d_norm < config.tol {
            converged = true;
            break;
        }

        // Serious step: update x
        for i in 0..n {
            x[i] += d[i];
        }

        // Null step convergence check: if max value not decreasing
        let new_max = problem.eval_max(&x.view());
        if (new_max - max_val).abs() < config.tol && k > 10 {
            converged = true;
            break;
        }
    }

    let active_idx = problem.argmax(&x_best.view());
    Ok(MinimaxSolveResult {
        x: x_best,
        fun: val_best,
        active_index: active_idx,
        n_iter: config.max_iter,
        converged,
        message: if converged {
            "Bundle method converged".to_string()
        } else {
            "Bundle method reached maximum iterations".to_string()
        },
    })
}

// ─── Nesterov Smooth Minimax ──────────────────────────────────────────────────

/// Minimize max_i f_i(x) via Nesterov's log-sum-exp smoothing.
///
/// The non-smooth objective  F(x) = max_i f_i(x)  is replaced by the smooth
/// surrogate:
///
/// ```text
/// F_μ(x) = (μ/k) · log(Σ_i exp(f_i(x) / μ))  →  F(x)  as  μ → 0
/// ```
///
/// The error satisfies F(x) ≤ F_μ(x) ≤ F(x) + μ·log(k), so for small μ
/// F_μ is an accurate smooth upper bound. Gradient descent on F_μ converges
/// at rate O(1/k²) (Nesterov accelerated gradient).
///
/// # Arguments
///
/// * `problem` – the minimax problem with k functions
/// * `x0`      – initial point
/// * `config`  – solver configuration (uses `smoothing_mu` as μ)
///
/// # Returns
///
/// [`MinimaxSolveResult`] with the approximate minimizer.
///
/// # References
///
/// Nesterov, Y. (2005). "Smooth minimization of non-smooth functions".
/// *Mathematical Programming*, 103(1), 127–152.
pub fn smooth_minimax(
    problem: &MinimaxProblem,
    x0: &ArrayView1<f64>,
    config: &MinimaxSolverConfig,
) -> OptimizeResult<MinimaxSolveResult> {
    if problem.num_funcs() == 0 {
        return Err(OptimizeError::ValueError(
            "MinimaxProblem must contain at least one function".to_string(),
        ));
    }
    let n = x0.len();
    if n == 0 {
        return Err(OptimizeError::ValueError(
            "x0 must be non-empty".to_string(),
        ));
    }
    let mu = config.smoothing_mu;
    if mu <= 0.0 {
        return Err(OptimizeError::ValueError(format!(
            "smoothing_mu must be positive, got {}",
            mu
        )));
    }

    let k = problem.num_funcs() as f64;
    let h = config.fd_step;

    // Smooth surrogate: F_μ(x) = μ * log( (1/k) Σ_i exp(f_i(x) / μ) )
    // This is the "soft-max" approximation.
    let smooth_obj = |x: &ArrayView1<f64>| -> f64 {
        let vals = problem.eval_all(x);
        // Use numerically stable log-sum-exp
        let max_val = vals.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
        if max_val.is_infinite() {
            return max_val;
        }
        let sum_exp: f64 = vals.iter().map(|&v| ((v - max_val) / mu).exp()).sum();
        mu * (sum_exp.ln() + (max_val / mu)) - mu * k.ln()
    };

    let mut x = x0.to_owned();
    let mut x_best = x.clone();
    let mut val_best = problem.eval_max(&x.view());
    let mut converged = false;

    // Nesterov accelerated gradient (AGD) / FISTA-style
    let mut y = x.clone(); // extrapolated point
    let mut t_k = 1.0_f64; // momentum coefficient

    for _ in 0..config.max_iter {
        // Gradient of smooth objective at extrapolated point y
        let grad = fd_gradient(&smooth_obj, &y.view(), h);
        let grad_norm = l2_norm(&grad);

        if grad_norm < config.tol {
            converged = true;
            break;
        }

        // Gradient descent step on y
        let x_new: Array1<f64> = y
            .iter()
            .zip(grad.iter())
            .map(|(&yi, &gi)| yi - config.step_size * gi)
            .collect();

        // Nesterov momentum update
        let t_new = (1.0 + (1.0 + 4.0 * t_k * t_k).sqrt()) / 2.0;
        let mom = (t_k - 1.0) / t_new;

        let y_new: Array1<f64> = x_new
            .iter()
            .zip(x.iter())
            .map(|(&xn, &xo)| xn + mom * (xn - xo))
            .collect();

        // Evaluate true minimax at new x
        let max_val = problem.eval_max(&x_new.view());
        if max_val < val_best {
            val_best = max_val;
            x_best = x_new.clone();
        }

        x = x_new;
        y = y_new;
        t_k = t_new;
    }

    let active_idx = problem.argmax(&x_best.view());
    Ok(MinimaxSolveResult {
        x: x_best,
        fun: val_best,
        active_index: active_idx,
        n_iter: config.max_iter,
        converged,
        message: if converged {
            "Smooth minimax (Nesterov) converged".to_string()
        } else {
            "Smooth minimax reached maximum iterations".to_string()
        },
    })
}

// ─── Game-Theoretic Minimax (Fictitious Play) ─────────────────────────────────

/// Result of a game-theoretic minimax solve.
#[derive(Debug, Clone)]
pub struct GameMinimaxResult {
    /// Approximate minimizer strategy (mixed or pure).
    pub x: Array1<f64>,
    /// Minimax value.
    pub fun: f64,
    /// Mixed strategy (probability distribution over pure strategies for the maximizer).
    /// Entry i is the empirical frequency of function i being active.
    pub maximizer_strategy: Array1<f64>,
    /// Number of iterations performed.
    pub n_iter: usize,
    /// Whether the algorithm converged.
    pub converged: bool,
    /// Status message.
    pub message: String,
}

/// Solve a zero-sum game minimax via fictitious play.
///
/// Fictitious play (Brown 1951) is a classical algorithm for zero-sum games:
/// each player responds optimally to the *empirical distribution* of the
/// opponent's past strategies.
///
/// For the continuous minimax problem, we discretize the minimizer's action
/// space around x₀ (using a simplex of perturbations) and apply:
///
/// 1. Maximizer picks the function i* = argmax_i Σ_j q_j f_i(x_j)
///    (best response to current minimizer mixture).
/// 2. Minimizer picks x* = argmin_x f_{i*}(x) + (sum of past active gradients).
///    (best response to current maximizer mixture q).
/// 3. Update empirical frequencies.
///
/// **Note**: This implementation uses a gradient-based inner response for the
/// minimizer (projected gradient descent on the currently active function),
/// which gives a tractable continuous analogue of fictitious play.
///
/// # Arguments
///
/// * `problem`       – the minimax problem
/// * `x0`            – initial minimizer point
/// * `step_size`     – step size for the minimizer's gradient response
/// * `config`        – solver configuration (uses `fictitious_play_iter`)
///
/// # Returns
///
/// [`GameMinimaxResult`] with approximate Nash equilibrium.
///
/// # References
///
/// Brown, G.W. (1951). "Iterative solution of games by fictitious play".
/// In *Activity Analysis of Production and Allocation*, pp. 374–376.
pub fn game_theoretic_minimax(
    problem: &MinimaxProblem,
    x0: &ArrayView1<f64>,
    step_size: f64,
    config: &MinimaxSolverConfig,
) -> OptimizeResult<GameMinimaxResult> {
    if problem.num_funcs() == 0 {
        return Err(OptimizeError::ValueError(
            "MinimaxProblem must contain at least one function".to_string(),
        ));
    }
    let n = x0.len();
    if n == 0 {
        return Err(OptimizeError::ValueError(
            "x0 must be non-empty".to_string(),
        ));
    }
    let k = problem.num_funcs();
    let h = config.fd_step;
    let max_fp_iter = config.fictitious_play_iter;

    // Empirical frequency counts for each function (maximizer's mixed strategy)
    let mut counts = vec![0_usize; k];
    let mut x = x0.to_owned();
    let mut x_best = x.clone();
    let mut val_best = problem.eval_max(&x.view());
    let mut converged = false;

    // Maintain a cumulative "average gradient" to guide the minimizer
    let mut cumulative_grad = Array1::<f64>::zeros(n);
    let mut cumulative_count = 0_usize;

    for iter in 0..max_fp_iter {
        // ── Maximizer's best response ───────────────────────────────────────
        // Compute the expected loss under the current minimizer iterate x
        let vals = problem.eval_all(&x.view());

        // Maximizer picks the function with highest value (best response)
        let active_i = vals
            .iter()
            .enumerate()
            .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
            .map(|(i, _)| i)
            .unwrap_or(0);

        counts[active_i] += 1;
        cumulative_count += 1;

        let max_val = vals[active_i];
        if max_val < val_best {
            val_best = max_val;
            x_best = x.clone();
        }

        // ── Minimizer's best response ───────────────────────────────────────
        // Compute gradient of the active function and accumulate
        let grad_i = fd_gradient(
            &|v: &ArrayView1<f64>| problem.funcs[active_i](v),
            &x.view(),
            h,
        );
        for i in 0..n {
            cumulative_grad[i] += grad_i[i];
        }

        // Minimizer takes a step against the cumulative (average) gradient
        let avg_grad_norm: f64 = cumulative_grad
            .iter()
            .map(|&g| g * g)
            .sum::<f64>()
            .sqrt()
            / cumulative_count as f64;

        if avg_grad_norm < config.tol {
            converged = true;
            break;
        }

        // Step in direction of negative average gradient (best response update)
        let alpha = step_size / ((iter as f64 + 1.0).sqrt());
        for i in 0..n {
            x[i] -= alpha * cumulative_grad[i] / cumulative_count as f64;
        }

        // Convergence: check if active function value is stable
        if iter > 10 && (max_val - problem.eval_max(&x.view())).abs() < config.tol {
            converged = true;
            break;
        }
    }

    // Compute empirical mixed strategy for the maximizer
    let total = counts.iter().sum::<usize>().max(1) as f64;
    let maximizer_strategy: Array1<f64> = counts.iter().map(|&c| c as f64 / total).collect();

    Ok(GameMinimaxResult {
        x: x_best,
        fun: val_best,
        maximizer_strategy,
        n_iter: max_fp_iter,
        converged,
        message: if converged {
            "Fictitious play converged".to_string()
        } else {
            "Fictitious play reached maximum iterations".to_string()
        },
    })
}

// ─── Convenience: evaluate smooth minimax objective ──────────────────────────

/// Evaluate the Nesterov smooth upper bound of max_i f_i(x).
///
/// ```text
/// F_μ(x) = μ · log( (1/k) Σ_i exp(f_i(x)/μ) )
/// ```
///
/// # Arguments
///
/// * `funcs` – slice of function closures f_i
/// * `x`     – evaluation point
/// * `mu`    – smoothing parameter > 0 (smaller → tighter approximation)
///
/// # Returns
///
/// The smooth upper bound value.
pub fn smooth_max_value<F>(funcs: &[F], x: &ArrayView1<f64>, mu: f64) -> OptimizeResult<f64>
where
    F: Fn(&ArrayView1<f64>) -> f64,
{
    if funcs.is_empty() {
        return Err(OptimizeError::ValueError(
            "funcs must be non-empty".to_string(),
        ));
    }
    if mu <= 0.0 {
        return Err(OptimizeError::ValueError(format!(
            "mu must be positive, got {}",
            mu
        )));
    }
    let vals: Vec<f64> = funcs.iter().map(|f| f(x)).collect();
    let max_val = vals.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
    if max_val.is_infinite() {
        return Ok(max_val);
    }
    let sum_exp: f64 = vals.iter().map(|&v| ((v - max_val) / mu).exp()).sum();
    let k = funcs.len() as f64;
    Ok(mu * (sum_exp.ln() + (max_val / mu)) - mu * k.ln())
}

// ─── Tests ────────────────────────────────────────────────────────────────────

#[cfg(test)]
mod tests {
    use super::*;
    use scirs2_core::ndarray::array;

    /// Build the problem max(f0, f1) where:
    ///   f0(x) = (x[0] - 1)²   (minimum at x=1)
    ///   f1(x) = (x[0] + 1)²   (minimum at x=-1)
    /// max(f0, f1) is minimised at x=0 where max = 1.
    fn build_two_func_problem() -> MinimaxProblem {
        MinimaxProblem::new(vec![
            Box::new(|x: &ArrayView1<f64>| (x[0] - 1.0).powi(2)),
            Box::new(|x: &ArrayView1<f64>| (x[0] + 1.0).powi(2)),
        ])
    }

    #[test]
    fn test_minimax_problem_eval() {
        let p = build_two_func_problem();
        let x = array![0.0];
        assert_eq!(p.eval_max(&x.view()), 1.0);
        let x2 = array![1.0];
        // f0(1)=0, f1(1)=4 → max=4
        assert!((p.eval_max(&x2.view()) - 4.0).abs() < 1e-9);
    }

    #[test]
    fn test_subgradient_basic() {
        let p = build_two_func_problem();
        let x0 = array![3.0];
        let config = MinimaxSolverConfig {
            max_iter: 3_000,
            tol: 1e-4,
            step_size: 0.5,
            ..Default::default()
        };
        let result = minimax_subgradient(&p, &x0.view(), &config).expect("failed to create result");
        // Optimal at x=0, minimax value = 1
        assert!(
            result.fun <= 1.5,
            "subgradient minimax value {} should be ≤ 1.5",
            result.fun
        );
        assert!(
            result.x[0].abs() < 1.0,
            "subgradient minimizer {} should be near 0",
            result.x[0]
        );
    }

    #[test]
    fn test_smooth_minimax_basic() {
        let p = build_two_func_problem();
        let x0 = array![3.0];
        let config = MinimaxSolverConfig {
            max_iter: 3_000,
            tol: 1e-5,
            step_size: 1e-2,
            smoothing_mu: 0.05,
            ..Default::default()
        };
        let result = smooth_minimax(&p, &x0.view(), &config).expect("failed to create result");
        assert!(
            result.fun <= 2.0,
            "smooth minimax value {} should be ≤ 2.0",
            result.fun
        );
    }

    #[test]
    fn test_game_theoretic_minimax() {
        let p = build_two_func_problem();
        let x0 = array![2.0];
        let config = MinimaxSolverConfig {
            max_iter: 500,
            tol: 1e-4,
            fictitious_play_iter: 500,
            ..Default::default()
        };
        let result = game_theoretic_minimax(&p, &x0.view(), 0.1, &config).expect("failed to create result");
        // Should move towards x=0
        assert!(
            result.x[0].abs() < 2.5,
            "game theoretic minimizer {} should move toward 0",
            result.x[0]
        );
        // Mixed strategy should be non-trivial (both functions played)
        assert_eq!(result.maximizer_strategy.len(), 2);
        let strat_sum: f64 = result.maximizer_strategy.iter().sum();
        assert!((strat_sum - 1.0).abs() < 1e-9, "strategy should sum to 1");
    }

    #[test]
    fn test_smooth_max_value() {
        let funcs: Vec<Box<dyn Fn(&ArrayView1<f64>) -> f64>> = vec![
            Box::new(|_x: &ArrayView1<f64>| 1.0),
            Box::new(|_x: &ArrayView1<f64>| 2.0),
            Box::new(|_x: &ArrayView1<f64>| 3.0),
        ];
        let x = array![0.0];
        let val = smooth_max_value(&funcs, &x.view(), 0.01).expect("failed to create val");
        // With small mu, smooth_max ≈ true max = 3.0
        assert!((val - 3.0).abs() < 0.1, "smooth max ≈ 3.0, got {val}");
    }

    #[test]
    fn test_bundle_method_basic() {
        let p = build_two_func_problem();
        let x0 = array![2.0];
        let config = MinimaxSolverConfig {
            max_iter: 500,
            tol: 1e-4,
            step_size: 0.5,
            ..Default::default()
        };
        let result = minimax_bundle(&p, &x0.view(), &config).expect("failed to create result");
        assert!(
            result.fun <= 5.0,
            "bundle minimax value {} should be reasonable",
            result.fun
        );
    }

    #[test]
    fn test_empty_problem_error() {
        let p = MinimaxProblem::new(vec![]);
        let x0 = array![1.0];
        let config = MinimaxSolverConfig::default();
        assert!(minimax_subgradient(&p, &x0.view(), &config).is_err());
        assert!(smooth_minimax(&p, &x0.view(), &config).is_err());
    }

    #[test]
    fn test_empty_x0_error() {
        let p = build_two_func_problem();
        let x0: Array1<f64> = Array1::zeros(0);
        let config = MinimaxSolverConfig::default();
        assert!(minimax_subgradient(&p, &x0.view(), &config).is_err());
    }
}