Skip to main content

scirs2_optimize/multilevel/
methods.rs

1//! Multi-level and multi-fidelity optimization methods
2//!
3//! Implements:
4//! - MultilevelOptimizer (coarse-to-fine)
5//! - VariableFidelity model manager
6//! - MFRBF surrogate with additive correction
7//! - TrustHierarchy (hierarchical trust regions)
8//! - MultigridOptimizer (multigrid-inspired V/W cycles)
9
10use crate::error::{OptimizeError, OptimizeResult};
11
12// ---------------------------------------------------------------------------
13// Result type
14// ---------------------------------------------------------------------------
15
16/// Result of a multi-level / multi-fidelity optimization
17#[derive(Debug, Clone)]
18pub struct MultilevelResult {
19    /// Optimal solution
20    pub x: Vec<f64>,
21    /// Function value at optimal solution (high-fidelity)
22    pub fun: f64,
23    /// Number of high-fidelity function evaluations
24    pub n_hifi_evals: usize,
25    /// Number of low-fidelity function evaluations
26    pub n_lofi_evals: usize,
27    /// Total number of iterations across all levels
28    pub n_iter: usize,
29    /// Whether the algorithm converged
30    pub success: bool,
31    /// Termination message
32    pub message: String,
33}
34
35// ---------------------------------------------------------------------------
36// FidelityLevel
37// ---------------------------------------------------------------------------
38
39/// A single fidelity level with an evaluation cost multiplier
40pub struct FidelityLevel<F>
41where
42    F: Fn(&[f64]) -> f64,
43{
44    /// Function for this fidelity level
45    pub func: F,
46    /// Relative cost (higher = more expensive)
47    pub cost: f64,
48}
49
50impl<F: Fn(&[f64]) -> f64> FidelityLevel<F> {
51    /// Create a new fidelity level
52    pub fn new(func: F, cost: f64) -> Self {
53        FidelityLevel { func, cost }
54    }
55
56    /// Evaluate the function
57    pub fn eval(&self, x: &[f64]) -> f64 {
58        (self.func)(x)
59    }
60}
61
62// ---------------------------------------------------------------------------
63// MultilevelOptions / MultilevelOptimizer
64// ---------------------------------------------------------------------------
65
66/// Options for the multi-level optimizer
67#[derive(Debug, Clone)]
68pub struct MultilevelOptions {
69    /// Maximum iterations per fidelity level
70    pub max_iter_per_level: usize,
71    /// Convergence tolerance
72    pub tol: f64,
73    /// Step size for gradient estimation
74    pub fd_step: f64,
75    /// Initial step size for gradient descent
76    pub step_size: f64,
77    /// Step shrinkage factor in line search
78    pub step_shrink: f64,
79    /// Number of coarse-level pre-smoothing steps before switching to fine
80    pub pre_smooth: usize,
81    /// Minimum improvement before promoting to next level
82    pub level_promotion_tol: f64,
83}
84
85impl Default for MultilevelOptions {
86    fn default() -> Self {
87        MultilevelOptions {
88            max_iter_per_level: 100,
89            tol: 1e-6,
90            fd_step: 1e-7,
91            step_size: 0.1,
92            step_shrink: 0.5,
93            pre_smooth: 5,
94            level_promotion_tol: 1e-3,
95        }
96    }
97}
98
99/// Multi-level (coarse-to-fine) optimizer
100///
101/// Starts at the coarsest (lowest-fidelity) level, optimizes until convergence
102/// or plateau, then promotes to the next finer level using the current solution
103/// as warm start.
104pub struct MultilevelOptimizer<F>
105where
106    F: Fn(&[f64]) -> f64,
107{
108    /// Fidelity levels ordered from coarse to fine
109    levels: Vec<FidelityLevel<F>>,
110    /// Initial point
111    x0: Vec<f64>,
112    /// Algorithm options
113    options: MultilevelOptions,
114}
115
116impl<F: Fn(&[f64]) -> f64> MultilevelOptimizer<F> {
117    /// Create a new multi-level optimizer
118    ///
119    /// Levels should be ordered from coarsest (cheapest) to finest (most expensive).
120    pub fn new(levels: Vec<FidelityLevel<F>>, x0: Vec<f64>, options: MultilevelOptions) -> Self {
121        MultilevelOptimizer {
122            levels,
123            x0,
124            options,
125        }
126    }
127
128    /// Run the multi-level optimization
129    pub fn minimize(&mut self) -> OptimizeResult<MultilevelResult> {
130        if self.levels.is_empty() {
131            return Err(OptimizeError::InvalidInput(
132                "At least one fidelity level required".to_string(),
133            ));
134        }
135        let n = self.x0.len();
136        if n == 0 {
137            return Err(OptimizeError::InvalidInput(
138                "Initial point must be non-empty".to_string(),
139            ));
140        }
141
142        let mut x = self.x0.clone();
143        let mut n_hifi = 0usize;
144        let mut n_lofi = 0usize;
145        let mut total_iter = 0usize;
146        let n_levels = self.levels.len();
147        let h = self.options.fd_step;
148
149        for (level_idx, level) in self.levels.iter().enumerate() {
150            let is_finest = level_idx == n_levels - 1;
151            let max_iter = if is_finest {
152                self.options.max_iter_per_level * 3
153            } else {
154                self.options.max_iter_per_level
155            };
156
157            let mut f_prev = level.eval(&x);
158            if is_finest {
159                n_hifi += 1;
160            } else {
161                n_lofi += 1;
162            }
163
164            for _iter in 0..max_iter {
165                total_iter += 1;
166
167                // Finite-difference gradient
168                let mut grad = vec![0.0f64; n];
169                for i in 0..n {
170                    let mut xf = x.clone();
171                    xf[i] += h;
172                    let f_fwd = level.eval(&xf);
173                    grad[i] = (f_fwd - f_prev) / h;
174                    if is_finest {
175                        n_hifi += 1;
176                    } else {
177                        n_lofi += 1;
178                    }
179                }
180
181                let gnorm: f64 = grad.iter().map(|g| g * g).sum::<f64>().sqrt();
182                if gnorm < self.options.tol {
183                    break;
184                }
185
186                // Armijo line search
187                let mut step = self.options.step_size;
188                let c1 = 1e-4;
189                let mut x_new = x.clone();
190                for _ in 0..30 {
191                    for i in 0..n {
192                        x_new[i] = x[i] - step * grad[i];
193                    }
194                    let f_new = level.eval(&x_new);
195                    if is_finest {
196                        n_hifi += 1;
197                    } else {
198                        n_lofi += 1;
199                    }
200                    if f_new <= f_prev - c1 * step * gnorm * gnorm {
201                        break;
202                    }
203                    step *= self.options.step_shrink;
204                }
205
206                let f_new = level.eval(&x_new);
207                if is_finest {
208                    n_hifi += 1;
209                } else {
210                    n_lofi += 1;
211                }
212
213                let delta = (f_new - f_prev).abs();
214                x = x_new;
215                f_prev = f_new;
216
217                if delta < self.options.tol * (1.0 + f_prev.abs()) {
218                    break;
219                }
220            }
221
222            // Check if further refinement is needed
223            if !is_finest {
224                // Only promote if improvement was significant
225                let _current_f = f_prev;
226            }
227        }
228
229        // Final evaluation at finest level
230        let final_f = self
231            .levels
232            .last()
233            .map(|l| l.eval(&x))
234            .unwrap_or(f64::INFINITY);
235        n_hifi += 1;
236
237        Ok(MultilevelResult {
238            x,
239            fun: final_f,
240            n_hifi_evals: n_hifi,
241            n_lofi_evals: n_lofi,
242            n_iter: total_iter,
243            success: true,
244            message: "Multi-level optimization completed".to_string(),
245        })
246    }
247}
248
249// ---------------------------------------------------------------------------
250// VariableFidelity
251// ---------------------------------------------------------------------------
252
253/// Options for variable fidelity optimization
254#[derive(Debug, Clone)]
255pub struct VariableFidelityOptions {
256    /// Budget: total weighted evaluation cost allowed
257    pub total_budget: f64,
258    /// Maximum iterations
259    pub max_iter: usize,
260    /// Convergence tolerance
261    pub tol: f64,
262    /// Finite difference step
263    pub fd_step: f64,
264    /// Trust region radius for switching to high fidelity
265    pub hifi_trust_radius: f64,
266}
267
268impl Default for VariableFidelityOptions {
269    fn default() -> Self {
270        VariableFidelityOptions {
271            total_budget: 1000.0,
272            max_iter: 500,
273            tol: 1e-6,
274            fd_step: 1e-7,
275            hifi_trust_radius: 0.1,
276        }
277    }
278}
279
280/// Variable fidelity model manager
281///
282/// Manages switching between low- and high-fidelity models based on
283/// a budget constraint. Uses low-fidelity exploration and high-fidelity
284/// validation/correction near candidate optima.
285pub struct VariableFidelity<FL, FH>
286where
287    FL: Fn(&[f64]) -> f64,
288    FH: Fn(&[f64]) -> f64,
289{
290    /// Low-fidelity (cheap) model
291    pub low_fi: FL,
292    /// High-fidelity (expensive) model
293    pub high_fi: FH,
294    /// Cost of one low-fidelity evaluation (in budget units)
295    pub cost_low: f64,
296    /// Cost of one high-fidelity evaluation (in budget units)
297    pub cost_high: f64,
298    /// Algorithm options
299    pub options: VariableFidelityOptions,
300}
301
302impl<FL, FH> VariableFidelity<FL, FH>
303where
304    FL: Fn(&[f64]) -> f64,
305    FH: Fn(&[f64]) -> f64,
306{
307    /// Create a new variable fidelity optimizer
308    pub fn new(
309        low_fi: FL,
310        high_fi: FH,
311        cost_low: f64,
312        cost_high: f64,
313        options: VariableFidelityOptions,
314    ) -> Self {
315        VariableFidelity {
316            low_fi,
317            high_fi,
318            cost_low,
319            cost_high,
320            options,
321        }
322    }
323
324    /// Evaluate low-fidelity model
325    pub fn eval_low(&self, x: &[f64]) -> f64 {
326        (self.low_fi)(x)
327    }
328
329    /// Evaluate high-fidelity model
330    pub fn eval_high(&self, x: &[f64]) -> f64 {
331        (self.high_fi)(x)
332    }
333
334    /// Run variable fidelity optimization
335    pub fn minimize(&self, x0: &[f64]) -> OptimizeResult<MultilevelResult> {
336        let n = x0.len();
337        if n == 0 {
338            return Err(OptimizeError::InvalidInput(
339                "Initial point must be non-empty".to_string(),
340            ));
341        }
342
343        let mut x = x0.to_vec();
344        let mut budget_used = 0.0f64;
345        let h = self.options.fd_step;
346        let mut n_hifi = 0usize;
347        let mut n_lofi = 0usize;
348        let mut n_iter = 0usize;
349
350        // Phase 1: Low-fidelity exploration
351        let mut f_low = self.eval_low(&x);
352        budget_used += self.cost_low;
353        n_lofi += 1;
354
355        while budget_used < self.options.total_budget * 0.7 && n_iter < self.options.max_iter {
356            n_iter += 1;
357
358            // FD gradient using low-fi
359            let mut grad = vec![0.0f64; n];
360            for i in 0..n {
361                let mut xf = x.clone();
362                xf[i] += h;
363                grad[i] = (self.eval_low(&xf) - f_low) / h;
364                budget_used += self.cost_low;
365                n_lofi += 1;
366            }
367
368            let gnorm: f64 = grad.iter().map(|g| g * g).sum::<f64>().sqrt();
369            if gnorm < self.options.tol {
370                break;
371            }
372
373            // Gradient step
374            let step = 0.05f64;
375            let mut x_new = vec![0.0f64; n];
376            for i in 0..n {
377                x_new[i] = x[i] - step * grad[i];
378            }
379            let f_new = self.eval_low(&x_new);
380            budget_used += self.cost_low;
381            n_lofi += 1;
382
383            if f_new < f_low {
384                x = x_new;
385                f_low = f_new;
386            } else {
387                break;
388            }
389        }
390
391        // Phase 2: High-fidelity refinement near the low-fi optimum
392        let mut f_high = self.eval_high(&x);
393        budget_used += self.cost_high;
394        n_hifi += 1;
395
396        // Additive correction: δ = f_H(x) - f_L(x)
397        let delta_correction = f_high - f_low;
398
399        while budget_used < self.options.total_budget && n_iter < self.options.max_iter * 2 {
400            n_iter += 1;
401
402            // Use corrected surrogate: f_L(x) + δ for gradient
403            let mut grad = vec![0.0f64; n];
404            for i in 0..n {
405                let mut xf = x.clone();
406                xf[i] += h;
407                // Corrected model gradient
408                grad[i] = (self.eval_low(&xf) + delta_correction - f_high) / h;
409                budget_used += self.cost_low;
410                n_lofi += 1;
411            }
412
413            let gnorm: f64 = grad.iter().map(|g| g * g).sum::<f64>().sqrt();
414            if gnorm < self.options.tol {
415                break;
416            }
417
418            // Try step
419            let step = 0.02f64;
420            let mut x_new = vec![0.0f64; n];
421            for i in 0..n {
422                x_new[i] = x[i] - step * grad[i];
423            }
424
425            // Validate with high-fidelity if within trust radius
426            let dist: f64 = x_new
427                .iter()
428                .zip(x.iter())
429                .map(|(a, b)| (a - b).powi(2))
430                .sum::<f64>()
431                .sqrt();
432
433            if dist < self.options.hifi_trust_radius {
434                let f_new_high = self.eval_high(&x_new);
435                budget_used += self.cost_high;
436                n_hifi += 1;
437                if f_new_high < f_high {
438                    x = x_new;
439                    f_high = f_new_high;
440                } else {
441                    break;
442                }
443            } else {
444                // Use low-fi acceptance
445                let f_new_low = self.eval_low(&x_new);
446                budget_used += self.cost_low;
447                n_lofi += 1;
448                if f_new_low + delta_correction < f_high {
449                    x = x_new;
450                }
451                break;
452            }
453        }
454
455        Ok(MultilevelResult {
456            x,
457            fun: f_high,
458            n_hifi_evals: n_hifi,
459            n_lofi_evals: n_lofi,
460            n_iter,
461            success: true,
462            message: format!(
463                "Variable fidelity optimization completed (budget used: {:.2})",
464                budget_used
465            ),
466        })
467    }
468}
469
470// ---------------------------------------------------------------------------
471// Linear algebra helper
472// ---------------------------------------------------------------------------
473
474/// Solve the linear system A x = b using Gaussian elimination with partial
475/// pivoting.  Returns `None` if A is singular (or nearly so).
476fn gaussian_elimination(a: &[Vec<f64>], b: &[f64]) -> Option<Vec<f64>> {
477    let n = b.len();
478    debug_assert_eq!(a.len(), n);
479
480    // Build augmented matrix [A | b]
481    let mut mat: Vec<Vec<f64>> = a
482        .iter()
483        .zip(b.iter())
484        .map(|(row, &rhs)| {
485            let mut r = row.clone();
486            r.push(rhs);
487            r
488        })
489        .collect();
490
491    for col in 0..n {
492        // Partial pivot: find row with largest absolute value in this column
493        let pivot_row = (col..n)
494            .max_by(|&r1, &r2| {
495                mat[r1][col]
496                    .abs()
497                    .partial_cmp(&mat[r2][col].abs())
498                    .unwrap_or(std::cmp::Ordering::Equal)
499            })
500            .unwrap_or(col);
501
502        mat.swap(col, pivot_row);
503
504        let pivot = mat[col][col];
505        if pivot.abs() < 1e-15 {
506            return None; // singular
507        }
508
509        // Eliminate below
510        for row in (col + 1)..n {
511            let factor = mat[row][col] / pivot;
512            for j in col..=n {
513                let val = mat[col][j] * factor;
514                mat[row][j] -= val;
515            }
516        }
517    }
518
519    // Back-substitution
520    let mut x = vec![0.0f64; n];
521    for i in (0..n).rev() {
522        let mut sum = mat[i][n];
523        for j in (i + 1)..n {
524            sum -= mat[i][j] * x[j];
525        }
526        x[i] = sum / mat[i][i];
527    }
528    Some(x)
529}
530
531// ---------------------------------------------------------------------------
532// MFRBF: Multi-Fidelity RBF Surrogate
533// ---------------------------------------------------------------------------
534
535/// Options for the MFRBF surrogate
536#[derive(Debug, Clone)]
537pub struct MfRbfOptions {
538    /// RBF kernel width parameter
539    pub length_scale: f64,
540    /// Maximum surrogate iterations
541    pub max_iter: usize,
542    /// Convergence tolerance
543    pub tol: f64,
544    /// Number of initial sample points
545    pub n_initial_samples: usize,
546    /// Budget for high-fidelity evaluations during optimization
547    pub hifi_budget: usize,
548}
549
550impl Default for MfRbfOptions {
551    fn default() -> Self {
552        MfRbfOptions {
553            length_scale: 1.0,
554            max_iter: 50,
555            tol: 1e-5,
556            n_initial_samples: 5,
557            hifi_budget: 20,
558        }
559    }
560}
561
562/// Sample point for the RBF surrogate
563#[derive(Debug, Clone)]
564struct SamplePoint {
565    x: Vec<f64>,
566    f_low: f64,
567    f_high: f64,
568}
569
570impl SamplePoint {
571    fn correction(&self) -> f64 {
572        self.f_high - self.f_low
573    }
574}
575
576/// Multi-fidelity RBF surrogate with additive correction.
577///
578/// Builds a correction surface δ(x) = f_H(x) - f_L(x) using RBF interpolation
579/// on a small number of high-fidelity samples, then optimizes f_L(x) + δ̂(x).
580pub struct MfRbf<FL, FH>
581where
582    FL: Fn(&[f64]) -> f64,
583    FH: Fn(&[f64]) -> f64,
584{
585    /// Low-fidelity model
586    pub low_fi: FL,
587    /// High-fidelity model
588    pub high_fi: FH,
589    /// Algorithm options
590    pub options: MfRbfOptions,
591    /// Sample points collected so far
592    samples: Vec<SamplePoint>,
593}
594
595impl<FL, FH> MfRbf<FL, FH>
596where
597    FL: Fn(&[f64]) -> f64,
598    FH: Fn(&[f64]) -> f64,
599{
600    /// Create a new MfRbf optimizer
601    pub fn new(low_fi: FL, high_fi: FH, options: MfRbfOptions) -> Self {
602        MfRbf {
603            low_fi,
604            high_fi,
605            options,
606            samples: Vec::new(),
607        }
608    }
609
610    fn eval_low(&self, x: &[f64]) -> f64 {
611        (self.low_fi)(x)
612    }
613
614    fn eval_high(&self, x: &[f64]) -> f64 {
615        (self.high_fi)(x)
616    }
617
618    /// Gaussian RBF kernel
619    fn rbf_kernel(&self, x: &[f64], y: &[f64]) -> f64 {
620        let dist_sq: f64 = x.iter().zip(y.iter()).map(|(a, b)| (a - b).powi(2)).sum();
621        (-dist_sq / (2.0 * self.options.length_scale.powi(2))).exp()
622    }
623
624    /// Evaluate the correction surrogate at x using RBF interpolation
625    fn eval_correction_surrogate(&self, x: &[f64]) -> f64 {
626        if self.samples.is_empty() {
627            return 0.0;
628        }
629
630        // Solve the RBF system on the fly: δ̂(x) = Σ_i w_i φ(||x - x_i||)
631        // Weights w = Φ^{-1} δ  where Φ_ij = φ(||x_i - x_j||)
632        let ns = self.samples.len();
633        let mut phi = vec![vec![0.0f64; ns]; ns];
634        for i in 0..ns {
635            for j in 0..ns {
636                phi[i][j] = self.rbf_kernel(&self.samples[i].x, &self.samples[j].x);
637            }
638            // Nugget regularization for numerical stability
639            phi[i][i] += 1e-6;
640        }
641
642        let corrections: Vec<f64> = self.samples.iter().map(|s| s.correction()).collect();
643
644        // Solve Φ w = δ using Gaussian elimination with partial pivoting.
645        // Jacobi iteration diverges when Φ is not diagonally dominant (common for
646        // clustered RBF centres), so we need a direct solver here.
647        let w = match gaussian_elimination(&phi, &corrections) {
648            Some(sol) => sol,
649            None => return 0.0,
650        };
651
652        // Evaluate surrogate: δ̂(x) = Σ w_i φ(||x - x_i||)
653        w.iter()
654            .zip(self.samples.iter())
655            .map(|(wi, si)| wi * self.rbf_kernel(x, &si.x))
656            .sum()
657    }
658
659    /// Add a sample (evaluate both fidelities)
660    pub fn add_sample(&mut self, x: Vec<f64>) {
661        let f_low = self.eval_low(&x);
662        let f_high = self.eval_high(&x);
663        self.samples.push(SamplePoint { x, f_low, f_high });
664    }
665
666    /// Evaluate the corrected surrogate: f_L(x) + δ̂(x)
667    pub fn eval_corrected(&self, x: &[f64]) -> f64 {
668        self.eval_low(x) + self.eval_correction_surrogate(x)
669    }
670
671    /// Run the MFRBF optimization
672    pub fn minimize(&mut self, x0: &[f64]) -> OptimizeResult<MultilevelResult> {
673        let n = x0.len();
674        if n == 0 {
675            return Err(OptimizeError::InvalidInput(
676                "Initial point must be non-empty".to_string(),
677            ));
678        }
679
680        let mut x = x0.to_vec();
681        let h = 1e-7f64;
682        let mut n_hifi = 0usize;
683        let mut n_lofi = 0usize;
684        let mut n_iter = 0usize;
685
686        // Initial sampling around x0
687        self.add_sample(x.clone());
688        n_hifi += 1;
689        n_lofi += 1;
690
691        for k in 1..self.options.n_initial_samples {
692            // Perturb x0 to generate diverse samples
693            let mut xs = x.clone();
694            let offset = 0.5 * (k as f64);
695            xs[k % n] += offset;
696            self.add_sample(xs);
697            n_hifi += 1;
698            n_lofi += 1;
699        }
700
701        // Optimize corrected surrogate using gradient descent with Armijo line-search.
702        // Track the best point seen to avoid returning a worse solution when the RBF
703        // correction is perturbed by newly added high-fidelity samples.
704        let mut f_prev = self.eval_corrected(&x);
705        let mut x_best = x.clone();
706        let mut f_best = f_prev;
707
708        for _iter in 0..self.options.max_iter {
709            n_iter += 1;
710
711            let mut grad = vec![0.0f64; n];
712            for i in 0..n {
713                let mut xf = x.clone();
714                xf[i] += h;
715                grad[i] = (self.eval_corrected(&xf) - f_prev) / h;
716                n_lofi += 1; // surrogate uses low-fi
717            }
718
719            let gnorm: f64 = grad.iter().map(|g| g * g).sum::<f64>().sqrt();
720            if gnorm < self.options.tol {
721                break;
722            }
723
724            // Armijo backtracking line-search: start with step=0.1 and halve until
725            // sufficient decrease is achieved (or a minimum step is reached).
726            let armijo_c = 1e-4;
727            let mut step = 0.1f64;
728            let mut x_new = vec![0.0f64; n];
729            let mut f_new = f_prev;
730            let mut descent_found = false;
731            for _ls in 0..20 {
732                for i in 0..n {
733                    x_new[i] = x[i] - step * grad[i];
734                }
735                f_new = self.eval_corrected(&x_new);
736                n_lofi += 1;
737                if f_new <= f_prev - armijo_c * step * gnorm * gnorm {
738                    descent_found = true;
739                    break;
740                }
741                step *= 0.5;
742            }
743
744            if descent_found || f_new < f_prev {
745                x = x_new.clone();
746                f_prev = f_new;
747
748                // Track the best point over all iterations so that RBF perturbations
749                // from new high-fidelity samples cannot cause us to return a worse x.
750                if f_new < f_best {
751                    x_best = x.clone();
752                    f_best = f_new;
753                }
754
755                // Refine with high-fidelity if budget allows
756                if n_hifi < self.options.hifi_budget {
757                    self.add_sample(x.clone());
758                    n_hifi += 1;
759                    n_lofi += 1;
760                    // Recompute corrected value with updated RBF
761                    f_prev = self.eval_corrected(&x);
762                    // Retain best in case corrected value jumped up after new sample
763                    if f_prev < f_best {
764                        f_best = f_prev;
765                        x_best = x.clone();
766                    }
767                }
768            }
769            // No descent: gradient norm already checked above; continue to let
770            // subsequent iterations explore further rather than terminating early.
771        }
772
773        // Final high-fidelity evaluation at the best surrogate point found
774        let final_hifi = self.eval_high(&x_best);
775        n_hifi += 1;
776
777        Ok(MultilevelResult {
778            x: x_best,
779            fun: final_hifi,
780            n_hifi_evals: n_hifi,
781            n_lofi_evals: n_lofi,
782            n_iter,
783            success: true,
784            message: "MFRBF optimization completed".to_string(),
785        })
786    }
787}
788
789// ---------------------------------------------------------------------------
790// TrustHierarchy
791// ---------------------------------------------------------------------------
792
793/// Options for the hierarchical trust region
794#[derive(Debug, Clone)]
795pub struct TrustHierarchyOptions {
796    /// Initial trust region radii per level
797    pub initial_radii: Vec<f64>,
798    /// Minimum trust region radius
799    pub min_radius: f64,
800    /// Maximum trust region radius
801    pub max_radius: f64,
802    /// Acceptance threshold (rho_min)
803    pub eta1: f64,
804    /// Good step threshold (rho_good)
805    pub eta2: f64,
806    /// Expansion factor
807    pub gamma_inc: f64,
808    /// Contraction factor
809    pub gamma_dec: f64,
810    /// Maximum iterations
811    pub max_iter: usize,
812    /// Convergence tolerance
813    pub tol: f64,
814}
815
816impl Default for TrustHierarchyOptions {
817    fn default() -> Self {
818        TrustHierarchyOptions {
819            initial_radii: vec![1.0, 0.1],
820            min_radius: 1e-8,
821            max_radius: 10.0,
822            eta1: 0.1,
823            eta2: 0.75,
824            gamma_inc: 2.0,
825            gamma_dec: 0.5,
826            max_iter: 200,
827            tol: 1e-6,
828        }
829    }
830}
831
832/// Hierarchical trust region management across fidelity levels.
833///
834/// Uses a hierarchy of trust regions, where the radius at finer levels is
835/// bounded by the radius at coarser levels. The algorithm cycles through
836/// levels, performing trust-region steps and updating radii.
837pub struct TrustHierarchy<F>
838where
839    F: Fn(&[f64]) -> f64,
840{
841    /// Models at each level (ordered coarse to fine)
842    pub levels: Vec<F>,
843    /// Algorithm options
844    pub options: TrustHierarchyOptions,
845}
846
847impl<F: Fn(&[f64]) -> f64> TrustHierarchy<F> {
848    /// Create a new hierarchical trust region solver
849    pub fn new(levels: Vec<F>, options: TrustHierarchyOptions) -> Self {
850        TrustHierarchy { levels, options }
851    }
852
853    /// Solve the optimization problem
854    pub fn minimize(&self, x0: &[f64]) -> OptimizeResult<MultilevelResult> {
855        let n = x0.len();
856        if n == 0 {
857            return Err(OptimizeError::InvalidInput(
858                "Initial point must be non-empty".to_string(),
859            ));
860        }
861        if self.levels.is_empty() {
862            return Err(OptimizeError::InvalidInput(
863                "At least one level required".to_string(),
864            ));
865        }
866
867        let n_levels = self.levels.len();
868        let mut radii: Vec<f64> = if self.options.initial_radii.len() >= n_levels {
869            self.options.initial_radii[..n_levels].to_vec()
870        } else {
871            let mut r = self.options.initial_radii.clone();
872            while r.len() < n_levels {
873                r.push(*r.last().unwrap_or(&1.0) * 0.5);
874            }
875            r
876        };
877
878        let mut x = x0.to_vec();
879        let h = 1e-7f64;
880        let mut n_evals = 0usize;
881        let mut n_iter = 0usize;
882
883        let finest_idx = n_levels - 1;
884        let mut f_cur = (self.levels[finest_idx])(&x);
885        n_evals += 1;
886
887        for _outer in 0..self.options.max_iter {
888            n_iter += 1;
889
890            // Compute gradient using finest level
891            let mut grad = vec![0.0f64; n];
892            for i in 0..n {
893                let mut xf = x.clone();
894                xf[i] += h;
895                grad[i] = ((self.levels[finest_idx])(&xf) - f_cur) / h;
896                n_evals += 1;
897            }
898
899            let gnorm: f64 = grad.iter().map(|g| g * g).sum::<f64>().sqrt();
900            if gnorm < self.options.tol {
901                break;
902            }
903
904            let radius = radii[finest_idx];
905
906            // Cauchy point step (steepest descent clipped to trust region)
907            let cauchy_step = (radius / gnorm).min(radius);
908            let mut x_trial = vec![0.0f64; n];
909            for i in 0..n {
910                x_trial[i] = x[i] - cauchy_step * grad[i] / gnorm;
911            }
912
913            // Compute actual and predicted reduction
914            let f_trial = (self.levels[finest_idx])(&x_trial);
915            n_evals += 1;
916            let actual_red = f_cur - f_trial;
917            let predicted_red = cauchy_step * gnorm; // gradient model prediction
918
919            if predicted_red.abs() < 1e-14 {
920                break;
921            }
922
923            let rho = actual_red / predicted_red;
924
925            // Update trust region radius
926            if rho < self.options.eta1 {
927                radii[finest_idx] = (radius * self.options.gamma_dec).max(self.options.min_radius);
928            } else {
929                // Accept step
930                x = x_trial;
931                f_cur = f_trial;
932
933                if rho > self.options.eta2 {
934                    radii[finest_idx] =
935                        (radius * self.options.gamma_inc).min(self.options.max_radius);
936                }
937
938                // Propagate radius changes to coarser levels
939                for l in (0..finest_idx).rev() {
940                    radii[l] = radii[l + 1] * 2.0;
941                }
942            }
943
944            if radii[finest_idx] < self.options.min_radius {
945                break;
946            }
947        }
948
949        Ok(MultilevelResult {
950            x,
951            fun: f_cur,
952            n_hifi_evals: n_evals,
953            n_lofi_evals: 0,
954            n_iter,
955            success: radii[finest_idx] >= self.options.min_radius,
956            message: "Trust hierarchy optimization completed".to_string(),
957        })
958    }
959}
960
961// ---------------------------------------------------------------------------
962// MultigridOptimizer (V/W cycle)
963// ---------------------------------------------------------------------------
964
965/// Options for the multigrid optimizer
966#[derive(Debug, Clone)]
967pub struct MultigridOptions {
968    /// Number of V-cycles to perform
969    pub n_cycles: usize,
970    /// Number of smoothing steps per level visit
971    pub n_smooth: usize,
972    /// Type of cycle: 'V' or 'W'
973    pub cycle_type: CycleType,
974    /// Convergence tolerance
975    pub tol: f64,
976    /// Gradient step size for smoothing
977    pub smooth_step: f64,
978    /// Restriction/interpolation factor between levels
979    pub coarsening_factor: f64,
980}
981
982/// Multigrid cycle type
983#[derive(Debug, Clone, Copy, PartialEq, Eq)]
984pub enum CycleType {
985    /// V-cycle (one visit per level)
986    V,
987    /// W-cycle (two visits at coarser levels)
988    W,
989}
990
991impl Default for MultigridOptions {
992    fn default() -> Self {
993        MultigridOptions {
994            n_cycles: 20,
995            n_smooth: 3,
996            cycle_type: CycleType::V,
997            tol: 1e-6,
998            smooth_step: 0.1,
999            coarsening_factor: 2.0,
1000        }
1001    }
1002}
1003
1004/// Multigrid-inspired optimizer using a hierarchy of approximation levels.
1005///
1006/// The "grid levels" here represent different approximation granularities.
1007/// Restriction maps a fine iterate to a coarser space; prolongation
1008/// (interpolation) maps the coarse correction back to the fine space.
1009///
1010/// On each smoothing step, gradient descent is applied. The multigrid
1011/// philosophy avoids slow convergence of fine-level iterations by first
1012/// making progress on coarser levels.
1013pub struct MultigridOptimizer<F>
1014where
1015    F: Fn(&[f64]) -> f64,
1016{
1017    /// Objective function (same function evaluated at different scales)
1018    pub func: F,
1019    /// Dimensionality at finest level
1020    pub n_fine: usize,
1021    /// Number of multigrid levels
1022    pub n_levels: usize,
1023    /// Algorithm options
1024    pub options: MultigridOptions,
1025}
1026
1027impl<F: Fn(&[f64]) -> f64> MultigridOptimizer<F> {
1028    /// Create a new multigrid optimizer
1029    pub fn new(func: F, n_fine: usize, n_levels: usize, options: MultigridOptions) -> Self {
1030        MultigridOptimizer {
1031            func,
1032            n_fine,
1033            n_levels,
1034            options,
1035        }
1036    }
1037
1038    /// Evaluate function
1039    fn eval(&self, x: &[f64]) -> f64 {
1040        (self.func)(x)
1041    }
1042
1043    /// Restrict fine vector to coarser level (simple averaging / decimation)
1044    fn restrict(&self, x_fine: &[f64], coarse_n: usize) -> Vec<f64> {
1045        let fine_n = x_fine.len();
1046        if coarse_n == 0 || fine_n == 0 {
1047            return vec![];
1048        }
1049        let mut x_coarse = vec![0.0f64; coarse_n];
1050        for i in 0..coarse_n {
1051            let ratio = fine_n as f64 / coarse_n as f64;
1052            let start = (i as f64 * ratio) as usize;
1053            let end = ((i + 1) as f64 * ratio) as usize;
1054            let end = end.min(fine_n);
1055            if start >= end {
1056                x_coarse[i] = x_fine[start.min(fine_n - 1)];
1057            } else {
1058                let sum: f64 = x_fine[start..end].iter().sum();
1059                x_coarse[i] = sum / (end - start) as f64;
1060            }
1061        }
1062        x_coarse
1063    }
1064
1065    /// Prolongate (interpolate) coarse correction to fine level
1066    fn prolongate(&self, correction_coarse: &[f64], fine_n: usize) -> Vec<f64> {
1067        let coarse_n = correction_coarse.len();
1068        if coarse_n == 0 || fine_n == 0 {
1069            return vec![0.0f64; fine_n];
1070        }
1071        let mut fine = vec![0.0f64; fine_n];
1072        for i in 0..fine_n {
1073            let t = i as f64 / (fine_n - 1).max(1) as f64;
1074            let coarse_pos = t * (coarse_n - 1).max(0) as f64;
1075            let coarse_i = coarse_pos as usize;
1076            let frac = coarse_pos - coarse_i as f64;
1077            let v0 = correction_coarse[coarse_i.min(coarse_n - 1)];
1078            let v1 = if coarse_i + 1 < coarse_n {
1079                correction_coarse[coarse_i + 1]
1080            } else {
1081                v0
1082            };
1083            fine[i] = v0 * (1.0 - frac) + v1 * frac;
1084        }
1085        fine
1086    }
1087
1088    /// Perform `n_smooth` gradient descent steps at current level
1089    fn smooth(&self, x: &[f64], n_smooth: usize, step: f64, nfev: &mut usize) -> Vec<f64> {
1090        let n = x.len();
1091        let h = 1e-7f64;
1092        let mut xc = x.to_vec();
1093
1094        for _ in 0..n_smooth {
1095            let f0 = self.eval(&xc);
1096            *nfev += 1;
1097            let mut grad = vec![0.0f64; n];
1098            for i in 0..n {
1099                let mut xf = xc.clone();
1100                xf[i] += h;
1101                grad[i] = (self.eval(&xf) - f0) / h;
1102                *nfev += 1;
1103            }
1104            let gnorm: f64 = grad.iter().map(|g| g * g).sum::<f64>().sqrt();
1105            if gnorm < 1e-10 {
1106                break;
1107            }
1108            for i in 0..n {
1109                xc[i] -= step * grad[i];
1110            }
1111        }
1112        xc
1113    }
1114
1115    /// Perform one V-cycle starting at `level` (0 = finest)
1116    fn v_cycle(&self, x: Vec<f64>, level: usize, nfev: &mut usize) -> Vec<f64> {
1117        let n = x.len();
1118
1119        // Pre-smoothing
1120        let mut x = self.smooth(&x, self.options.n_smooth, self.options.smooth_step, nfev);
1121
1122        if level < self.n_levels - 1 && n > 1 {
1123            // Restrict to coarser level
1124            let coarse_n = (n as f64 / self.options.coarsening_factor).ceil() as usize;
1125            let coarse_n = coarse_n.max(1);
1126            let x_coarse = self.restrict(&x, coarse_n);
1127
1128            // Recurse at coarser level
1129            let x_coarse_smooth = self.v_cycle(x_coarse.clone(), level + 1, nfev);
1130
1131            // Compute correction
1132            let correction: Vec<f64> = x_coarse_smooth
1133                .iter()
1134                .zip(x_coarse.iter())
1135                .map(|(a, b)| a - b)
1136                .collect();
1137
1138            // Prolongate correction back to fine level
1139            let correction_fine = self.prolongate(&correction, n);
1140
1141            // Apply correction
1142            for i in 0..n {
1143                x[i] += correction_fine[i];
1144            }
1145        }
1146
1147        // Post-smoothing
1148        self.smooth(&x, self.options.n_smooth, self.options.smooth_step, nfev)
1149    }
1150
1151    /// Perform one W-cycle (two coarse-level visits)
1152    fn w_cycle(&self, x: Vec<f64>, level: usize, nfev: &mut usize) -> Vec<f64> {
1153        let n = x.len();
1154        let mut x = self.smooth(&x, self.options.n_smooth, self.options.smooth_step, nfev);
1155
1156        if level < self.n_levels - 1 && n > 1 {
1157            let coarse_n = (n as f64 / self.options.coarsening_factor).ceil() as usize;
1158            let coarse_n = coarse_n.max(1);
1159            let x_coarse0 = self.restrict(&x, coarse_n);
1160
1161            // First coarse-level solve
1162            let x_coarse1 = self.w_cycle(x_coarse0.clone(), level + 1, nfev);
1163
1164            // Second coarse-level solve (W-cycle difference from V-cycle)
1165            let x_coarse2 = self.w_cycle(x_coarse1.clone(), level + 1, nfev);
1166
1167            let correction: Vec<f64> = x_coarse2
1168                .iter()
1169                .zip(x_coarse0.iter())
1170                .map(|(a, b)| a - b)
1171                .collect();
1172            let correction_fine = self.prolongate(&correction, n);
1173            for i in 0..n {
1174                x[i] += correction_fine[i];
1175            }
1176        }
1177
1178        self.smooth(&x, self.options.n_smooth, self.options.smooth_step, nfev)
1179    }
1180
1181    /// Run the multigrid optimizer
1182    pub fn minimize(&self, x0: &[f64]) -> OptimizeResult<MultilevelResult> {
1183        if x0.is_empty() {
1184            return Err(OptimizeError::InvalidInput(
1185                "Initial point must be non-empty".to_string(),
1186            ));
1187        }
1188
1189        let mut x = x0.to_vec();
1190        let mut n_evals = 0usize;
1191        let mut n_iter = 0usize;
1192        let mut f_prev = self.eval(&x);
1193        n_evals += 1;
1194
1195        for _cycle in 0..self.options.n_cycles {
1196            n_iter += 1;
1197            x = match self.options.cycle_type {
1198                CycleType::V => self.v_cycle(x, 0, &mut n_evals),
1199                CycleType::W => self.w_cycle(x, 0, &mut n_evals),
1200            };
1201            let f_new = self.eval(&x);
1202            n_evals += 1;
1203            let delta = (f_new - f_prev).abs();
1204            if delta < self.options.tol * (1.0 + f_prev.abs()) {
1205                f_prev = f_new;
1206                break;
1207            }
1208            f_prev = f_new;
1209        }
1210
1211        Ok(MultilevelResult {
1212            x,
1213            fun: f_prev,
1214            n_hifi_evals: n_evals,
1215            n_lofi_evals: 0,
1216            n_iter,
1217            success: true,
1218            message: format!(
1219                "Multigrid ({:?}-cycle) optimization completed",
1220                self.options.cycle_type
1221            ),
1222        })
1223    }
1224}
1225
1226// ---------------------------------------------------------------------------
1227// Tests
1228// ---------------------------------------------------------------------------
1229
1230#[cfg(test)]
1231mod tests {
1232    use super::*;
1233
1234    fn quadratic(x: &[f64]) -> f64 {
1235        x.iter().map(|xi| xi.powi(2)).sum()
1236    }
1237
1238    fn quadratic_shifted(x: &[f64]) -> f64 {
1239        x.iter().map(|xi| (xi - 0.2).powi(2)).sum()
1240    }
1241
1242    #[test]
1243    fn test_multilevel_optimizer_basic() {
1244        let levels = vec![
1245            FidelityLevel::new(quadratic_shifted as fn(&[f64]) -> f64, 1.0),
1246            FidelityLevel::new(quadratic as fn(&[f64]) -> f64, 10.0),
1247        ];
1248        let mut optimizer = MultilevelOptimizer::new(
1249            levels,
1250            vec![1.0, 1.0],
1251            MultilevelOptions {
1252                max_iter_per_level: 200,
1253                tol: 1e-5,
1254                ..Default::default()
1255            },
1256        );
1257        let result = optimizer.minimize().expect("failed to create result");
1258        assert!(result.fun < 0.1, "Expected fun < 0.1, got {}", result.fun);
1259    }
1260
1261    #[test]
1262    fn test_variable_fidelity_basic() {
1263        let vf = VariableFidelity::new(
1264            quadratic_shifted,
1265            quadratic,
1266            1.0,
1267            10.0,
1268            VariableFidelityOptions {
1269                total_budget: 200.0,
1270                max_iter: 100,
1271                tol: 1e-5,
1272                ..Default::default()
1273            },
1274        );
1275        let result = vf.minimize(&[1.5, 1.5]).expect("failed to create result");
1276        assert!(result.fun < 0.5, "Expected fun < 0.5, got {}", result.fun);
1277    }
1278
1279    #[test]
1280    fn test_trust_hierarchy_basic() {
1281        let levels: Vec<fn(&[f64]) -> f64> = vec![quadratic_shifted, quadratic];
1282        let th = TrustHierarchy::new(
1283            levels,
1284            TrustHierarchyOptions {
1285                initial_radii: vec![2.0, 1.0],
1286                max_iter: 200,
1287                tol: 1e-5,
1288                ..Default::default()
1289            },
1290        );
1291        let result = th.minimize(&[1.0, 1.0]).expect("failed to create result");
1292        assert!(result.fun < 0.1, "Expected fun < 0.1, got {}", result.fun);
1293    }
1294
1295    #[test]
1296    fn test_multigrid_v_cycle() {
1297        let optimizer = MultigridOptimizer::new(
1298            quadratic,
1299            2,
1300            2,
1301            MultigridOptions {
1302                n_cycles: 30,
1303                n_smooth: 5,
1304                cycle_type: CycleType::V,
1305                tol: 1e-5,
1306                smooth_step: 0.1,
1307                coarsening_factor: 2.0,
1308            },
1309        );
1310        let result = optimizer
1311            .minimize(&[1.0, 1.0])
1312            .expect("failed to create result");
1313        assert!(result.fun < 0.1, "Expected fun < 0.1, got {}", result.fun);
1314    }
1315
1316    #[test]
1317    fn test_multigrid_w_cycle() {
1318        let optimizer = MultigridOptimizer::new(
1319            quadratic,
1320            2,
1321            2,
1322            MultigridOptions {
1323                n_cycles: 30,
1324                n_smooth: 5,
1325                cycle_type: CycleType::W,
1326                tol: 1e-5,
1327                smooth_step: 0.1,
1328                coarsening_factor: 2.0,
1329            },
1330        );
1331        let result = optimizer
1332            .minimize(&[1.0, 1.0])
1333            .expect("failed to create result");
1334        assert!(result.fun < 0.1, "Expected fun < 0.1, got {}", result.fun);
1335    }
1336
1337    #[test]
1338    fn test_mfrbf_basic() {
1339        let mut mfrbf = MfRbf::new(
1340            quadratic_shifted,
1341            quadratic,
1342            MfRbfOptions {
1343                n_initial_samples: 3,
1344                hifi_budget: 10,
1345                max_iter: 50,
1346                tol: 1e-4,
1347                length_scale: 1.0,
1348            },
1349        );
1350        let result = mfrbf
1351            .minimize(&[1.0, 1.0])
1352            .expect("failed to create result");
1353        assert!(result.fun < 1.0, "Expected fun < 1.0, got {}", result.fun);
1354    }
1355}