Skip to main content

scirs2_optimize/unconstrained/
mod.rs

1//! Unconstrained optimization algorithms
2//!
3//! This module provides various algorithms for unconstrained minimization problems.
4
5use crate::error::OptimizeError;
6use scirs2_core::ndarray::{Array1, Array2, ArrayView1};
7use std::fmt;
8
9/// Method for computing the Jacobian (gradient) of the objective function
10pub enum Jacobian<'a> {
11    /// Compute gradient using finite differences
12    FiniteDiff,
13    /// User-provided gradient function
14    Function(Box<dyn Fn(&ArrayView1<f64>) -> Array1<f64> + 'a>),
15}
16
17impl<'a> std::fmt::Debug for Jacobian<'a> {
18    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
19        match self {
20            Jacobian::FiniteDiff => write!(f, "Jacobian::FiniteDiff"),
21            Jacobian::Function(_) => write!(f, "Jacobian::Function(<function>)"),
22        }
23    }
24}
25
26// Sub-modules
27pub mod adaptive_convergence;
28pub mod advanced_line_search;
29pub mod bfgs;
30pub mod callback_diagnostics;
31pub(crate) mod cg_gpu;
32pub mod conjugate_gradient;
33pub mod convergence_diagnostics;
34pub mod efficient_sparse;
35pub mod lbfgs;
36pub(crate) mod lbfgs_gpu;
37pub mod lbfgsb;
38pub mod line_search;
39pub mod memory_efficient;
40pub mod memory_efficient_sparse;
41pub mod nelder_mead;
42pub mod newton;
43pub mod newton_cg;
44pub(crate) mod newton_gpu;
45pub mod powell;
46pub mod quasi_newton;
47pub mod result;
48pub mod robust_convergence;
49pub mod simd_bfgs;
50pub mod sparse_optimization;
51pub mod strong_wolfe;
52pub mod subspace_methods;
53pub mod truncated_newton;
54pub mod trust_region;
55pub mod utils;
56
57// Import result type
58pub use result::OptimizeResult;
59
60// Re-export commonly used items
61pub use adaptive_convergence::{
62    check_convergence_adaptive, create_adaptive_options_for_problem, AdaptationStats,
63    AdaptiveToleranceOptions, AdaptiveToleranceState, ConvergenceStatus,
64};
65pub use advanced_line_search::{
66    advanced_line_search, create_non_monotone_state, AdvancedLineSearchOptions,
67    InterpolationStrategy, LineSearchMethod, LineSearchResult, LineSearchStats,
68};
69pub use bfgs::{minimize_bfgs, minimize_bfgs_no_grad, minimize_bfgs_with_jacobian};
70pub use callback_diagnostics::{
71    minimize_with_diagnostics, optimize_with_diagnostics, CallbackInfo, CallbackResult,
72    DiagnosticOptimizer, OptimizationCallback,
73};
74pub use conjugate_gradient::{
75    minimize_conjugate_gradient, minimize_conjugate_gradient_with_jacobian,
76};
77pub use convergence_diagnostics::{
78    ConvergenceDiagnostics, DiagnosticCollector, DiagnosticOptions, DiagnosticWarning,
79    ExportFormat, IterationDiagnostic, LineSearchDiagnostic, PerformanceMetrics, ProblemAnalysis,
80    ProblemDifficulty, WarningSeverity,
81};
82pub use efficient_sparse::{
83    minimize_efficient_sparse_newton, EfficientSparseOptions, SparsityInfo,
84};
85pub use lbfgs::{minimize_lbfgs, minimize_lbfgsb};
86pub use memory_efficient::{
87    create_memory_efficient_optimizer, minimize_memory_efficient_lbfgs, MemoryOptions,
88};
89pub use memory_efficient_sparse::{
90    create_advanced_scale_optimizer, minimize_advanced_scale, AdvancedScaleOptions,
91};
92pub use nelder_mead::minimize_nelder_mead;
93pub use newton::minimize_newton_cg;
94pub use powell::minimize_powell;
95pub use quasi_newton::{minimize_dfp, minimize_quasi_newton, minimize_sr1, UpdateFormula};
96pub use robust_convergence::{
97    create_robust_options_for_problem, RobustConvergenceOptions, RobustConvergenceResult,
98    RobustConvergenceState,
99};
100pub use simd_bfgs::{minimize_simd_bfgs, minimize_simd_bfgs_default, SimdBfgsOptions};
101pub use sparse_optimization::{
102    auto_detect_sparsity, minimize_sparse_bfgs, SparseOptimizationOptions,
103};
104pub use strong_wolfe::{
105    create_strong_wolfe_options_for_method, strong_wolfe_line_search, StrongWolfeOptions,
106    StrongWolfeResult,
107};
108pub use subspace_methods::{
109    minimize_adaptive_subspace, minimize_block_coordinate_descent,
110    minimize_cyclical_coordinate_descent, minimize_random_coordinate_descent,
111    minimize_random_subspace, minimize_subspace, SubspaceMethod, SubspaceOptions,
112};
113pub use truncated_newton::{
114    minimize_truncated_newton, minimize_trust_region_newton, Preconditioner, TruncatedNewtonOptions,
115};
116pub use trust_region::{
117    cauchy_point, dogleg_step, minimize_trust_exact, minimize_trust_krylov, minimize_trust_ncg,
118    solve_trust_subproblem, trust_region_minimize, TrustRegionConfig, TrustRegionResult,
119};
120
121/// Optimization methods for unconstrained minimization.
122#[derive(Debug, Clone, Copy)]
123pub enum Method {
124    /// Nelder-Mead simplex method
125    NelderMead,
126    /// Powell's method
127    Powell,
128    /// Conjugate gradient method
129    CG,
130    /// BFGS quasi-Newton method
131    BFGS,
132    /// SR1 quasi-Newton method
133    SR1,
134    /// DFP quasi-Newton method
135    DFP,
136    /// Limited-memory BFGS method
137    LBFGS,
138    /// Limited-memory BFGS method with bounds support
139    LBFGSB,
140    /// Newton's method with conjugate gradient solver
141    NewtonCG,
142    /// Trust-region Newton method with conjugate gradient solver
143    TrustNCG,
144    /// Trust-region method with Krylov subproblem solver
145    TrustKrylov,
146    /// Trust-region method with exact subproblem solver
147    TrustExact,
148    /// Truncated Newton method
149    TruncatedNewton,
150    /// Trust-region Newton method with truncated CG
151    TrustRegionNewton,
152    /// Trust-region dogleg method (Cauchy point + dogleg step)
153    TrustRegionDogleg,
154}
155
156/// Bounds for optimization variables
157#[derive(Debug, Clone)]
158pub struct Bounds {
159    /// Lower bounds
160    pub lower: Vec<Option<f64>>,
161    /// Upper bounds
162    pub upper: Vec<Option<f64>>,
163}
164
165/// Options for optimization algorithms
166#[derive(Debug, Clone)]
167pub struct Options {
168    /// Maximum number of iterations
169    pub max_iter: usize,
170    /// Maximum number of function evaluations
171    pub max_fev: Option<usize>,
172    /// Function tolerance for convergence
173    pub ftol: f64,
174    /// Change tolerance for convergence
175    pub xtol: f64,
176    /// Gradient tolerance for convergence
177    pub gtol: f64,
178    /// Initial step size
179    pub initial_step: Option<f64>,
180    /// Maximum step size for line search
181    pub maxstep: Option<f64>,
182    /// Whether to use finite differences for gradient
183    pub finite_diff: bool,
184    /// Finite difference step size
185    pub eps: f64,
186    /// Initial trust-region radius for trust-region methods
187    pub trust_radius: Option<f64>,
188    /// Maximum trust-region radius for trust-region methods
189    pub max_trust_radius: Option<f64>,
190    /// Minimum trust-region radius for trust-region methods
191    pub min_trust_radius: Option<f64>,
192    /// Tolerance for the trust-region subproblem
193    pub trust_tol: Option<f64>,
194    /// Maximum iterations for trust-region subproblem
195    pub trust_max_iter: Option<usize>,
196    /// Threshold for accepting a step in the trust-region method
197    pub trust_eta: Option<f64>,
198    /// Bounds constraints for variables
199    pub bounds: Option<Bounds>,
200    /// Enable GPU acceleration in L-BFGS, Conjugate Gradient, and Newton-CG when
201    /// `n_params >= threshold` and a wgpu adapter is present.  Per-method thresholds:
202    /// L-BFGS: 4096 (`GPU_LBFGS_THRESHOLD`), CG: 4096 (`GPU_CG_THRESHOLD`),
203    /// Newton-CG: 4096 (`GPU_NEWTON_THRESHOLD`).
204    ///
205    /// Requires the `gpu` feature.  Defaults to `true` (auto-detect). Set to `false` to force
206    /// the CPU path regardless of hardware.
207    pub use_gpu: bool,
208    /// Override the GPU threshold for testing purposes.  When `Some(n)`, GPU dispatch is
209    /// attempted if `n_params >= n` (and `use_gpu` is `true` and an adapter is present).
210    /// `None` uses the per-method default threshold (4096 for L-BFGS, CG, and Newton-CG).
211    pub gpu_threshold_override: Option<usize>,
212}
213
214// Implement Display for Method
215impl fmt::Display for Method {
216    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
217        match self {
218            Method::NelderMead => write!(f, "Nelder-Mead"),
219            Method::Powell => write!(f, "Powell"),
220            Method::CG => write!(f, "Conjugate Gradient"),
221            Method::BFGS => write!(f, "BFGS"),
222            Method::SR1 => write!(f, "SR1"),
223            Method::DFP => write!(f, "DFP"),
224            Method::LBFGS => write!(f, "L-BFGS"),
225            Method::LBFGSB => write!(f, "L-BFGS-B"),
226            Method::NewtonCG => write!(f, "Newton-CG"),
227            Method::TrustNCG => write!(f, "Trust-NCG"),
228            Method::TrustKrylov => write!(f, "Trust-Krylov"),
229            Method::TrustExact => write!(f, "Trust-Exact"),
230            Method::TruncatedNewton => write!(f, "Truncated Newton"),
231            Method::TrustRegionNewton => write!(f, "Trust-Region Newton"),
232            Method::TrustRegionDogleg => write!(f, "Trust-Region Dogleg"),
233        }
234    }
235}
236
237// Implement Default for Options
238impl Default for Options {
239    fn default() -> Self {
240        Options {
241            max_iter: 1000,
242            max_fev: None,
243            ftol: 1e-8,
244            xtol: 1e-8,
245            gtol: 1e-5,
246            initial_step: None,
247            maxstep: None,
248            finite_diff: false,
249            eps: 1.4901161193847656e-8,
250            trust_radius: Some(1.0),
251            max_trust_radius: Some(100.0),
252            min_trust_radius: Some(1e-10),
253            trust_tol: Some(1e-8),
254            trust_max_iter: Some(100),
255            trust_eta: Some(0.1),
256            bounds: None,
257            use_gpu: true,
258            gpu_threshold_override: None,
259        }
260    }
261}
262
263// Implement Bounds methods
264impl Bounds {
265    /// Create new bounds from arrays
266    pub fn new(bounds: &[(Option<f64>, Option<f64>)]) -> Self {
267        let (lower, upper): (Vec<_>, Vec<_>) = bounds.iter().cloned().unzip();
268        Self { lower, upper }
269    }
270
271    /// Create bounds from vectors
272    pub fn from_vecs(lb: Vec<Option<f64>>, ub: Vec<Option<f64>>) -> Result<Self, OptimizeError> {
273        if lb.len() != ub.len() {
274            return Err(OptimizeError::ValueError(
275                "Lower and upper bounds must have the same length".to_string(),
276            ));
277        }
278
279        for (l, u) in lb.iter().zip(ub.iter()) {
280            if let (Some(l_val), Some(u_val)) = (l, u) {
281                if l_val > u_val {
282                    return Err(OptimizeError::ValueError(
283                        "Lower bound must be less than or equal to upper bound".to_string(),
284                    ));
285                }
286            }
287        }
288
289        Ok(Self {
290            lower: lb,
291            upper: ub,
292        })
293    }
294
295    /// Check if point is feasible
296    pub fn is_feasible(&self, x: &[f64]) -> bool {
297        if x.len() != self.lower.len() {
298            return false;
299        }
300
301        for (&xi, (&lb, &ub)) in x.iter().zip(self.lower.iter().zip(self.upper.iter())) {
302            if let Some(l) = lb {
303                if xi < l {
304                    return false;
305                }
306            }
307            if let Some(u) = ub {
308                if xi > u {
309                    return false;
310                }
311            }
312        }
313        true
314    }
315
316    /// Project point onto feasible region
317    pub fn project(&self, x: &mut [f64]) {
318        for (xi, (&lb, &ub)) in x.iter_mut().zip(self.lower.iter().zip(self.upper.iter())) {
319            if let Some(l) = lb {
320                if *xi < l {
321                    *xi = l;
322                }
323            }
324            if let Some(u) = ub {
325                if *xi > u {
326                    *xi = u;
327                }
328            }
329        }
330    }
331
332    /// Check if bounds are active
333    pub fn has_bounds(&self) -> bool {
334        self.lower.iter().any(|b| b.is_some()) || self.upper.iter().any(|b| b.is_some())
335    }
336}
337
338/// Main minimize function for unconstrained optimization
339#[allow(dead_code)]
340pub fn minimize<F, S>(
341    fun: F,
342    x0: &[f64],
343    method: Method,
344    options: Option<Options>,
345) -> Result<OptimizeResult<S>, OptimizeError>
346where
347    F: FnMut(&ArrayView1<f64>) -> S + Clone,
348    S: Into<f64> + Clone + From<f64>,
349{
350    let options = &options.unwrap_or_default();
351    let x0 = Array1::from_vec(x0.to_vec());
352
353    // Check initial point feasibility if bounds are provided
354    if let Some(ref bounds) = options.bounds {
355        let x0_slice = x0.as_slice().ok_or_else(|| {
356            OptimizeError::ComputationError("Failed to get slice for feasibility check".to_string())
357        })?;
358        if !bounds.is_feasible(x0_slice) {
359            return Err(OptimizeError::ValueError(
360                "Initial point is not feasible".to_string(),
361            ));
362        }
363    }
364
365    match method {
366        Method::NelderMead => nelder_mead::minimize_nelder_mead(fun, x0, options),
367        Method::Powell => powell::minimize_powell(fun, x0, options),
368        Method::CG => conjugate_gradient::minimize_conjugate_gradient(
369            fun,
370            None::<fn(&ArrayView1<f64>) -> Array1<f64>>,
371            x0,
372            options,
373        ),
374        Method::BFGS => bfgs::minimize_bfgs(
375            fun,
376            None::<fn(&ArrayView1<f64>) -> Array1<f64>>,
377            x0,
378            options,
379        ),
380        Method::SR1 => quasi_newton::minimize_sr1(fun, x0, options),
381        Method::DFP => quasi_newton::minimize_dfp(fun, x0, options),
382        Method::LBFGS => lbfgs::minimize_lbfgs(
383            fun,
384            None::<fn(&ArrayView1<f64>) -> Array1<f64>>,
385            x0,
386            options,
387        ),
388        Method::LBFGSB => lbfgs::minimize_lbfgsb(
389            fun,
390            None::<fn(&ArrayView1<f64>) -> Array1<f64>>,
391            x0,
392            options,
393        ),
394        Method::NewtonCG => newton::minimize_newton_cg(fun, x0, options),
395        Method::TrustNCG => trust_region::minimize_trust_ncg(fun, x0, options),
396        Method::TrustKrylov => trust_region::minimize_trust_krylov(fun, x0, options),
397        Method::TrustExact => trust_region::minimize_trust_exact(fun, x0, options),
398        Method::TruncatedNewton => truncated_newton_wrapper(fun, x0, options),
399        Method::TrustRegionNewton => trust_region_newton_wrapper(fun, x0, options),
400        Method::TrustRegionDogleg => trust_region_dogleg_wrapper(fun, x0, options),
401    }
402}
403
404/// Wrapper function for truncated Newton method
405#[allow(dead_code)]
406fn truncated_newton_wrapper<F, S>(
407    mut fun: F,
408    x0: Array1<f64>,
409    options: &Options,
410) -> Result<OptimizeResult<S>, OptimizeError>
411where
412    F: FnMut(&ArrayView1<f64>) -> S + Clone,
413    S: Into<f64> + Clone + From<f64>,
414{
415    let fun_f64 = move |x: &ArrayView1<f64>| fun(x).into();
416
417    let truncated_options = TruncatedNewtonOptions {
418        max_iter: options.max_iter,
419        tol: options.gtol,
420        max_cg_iter: options.trust_max_iter.unwrap_or(100),
421        cg_tol: options.trust_tol.unwrap_or(0.1),
422        ..Default::default()
423    };
424
425    // Convert result back to generic type
426    let result = truncated_newton::minimize_truncated_newton(
427        fun_f64,
428        None::<fn(&ArrayView1<f64>) -> Array1<f64>>,
429        x0,
430        Some(truncated_options),
431    )?;
432
433    Ok(OptimizeResult {
434        x: result.x,
435        fun: result.fun.into(),
436        nit: result.nit,
437        func_evals: result.func_evals,
438        nfev: result.nfev,
439        jacobian: result.jacobian,
440        hessian: result.hessian,
441        success: result.success,
442        message: result.message,
443    })
444}
445
446/// Wrapper function for trust-region Newton method
447#[allow(dead_code)]
448fn trust_region_newton_wrapper<F, S>(
449    mut fun: F,
450    x0: Array1<f64>,
451    options: &Options,
452) -> Result<OptimizeResult<S>, OptimizeError>
453where
454    F: FnMut(&ArrayView1<f64>) -> S + Clone,
455    S: Into<f64> + Clone + From<f64>,
456{
457    let fun_f64 = move |x: &ArrayView1<f64>| fun(x).into();
458
459    let truncated_options = TruncatedNewtonOptions {
460        max_iter: options.max_iter,
461        tol: options.gtol,
462        max_cg_iter: options.trust_max_iter.unwrap_or(100),
463        cg_tol: options.trust_tol.unwrap_or(0.1),
464        trust_radius: options.trust_radius,
465        ..Default::default()
466    };
467
468    // Convert result back to generic type
469    let result = truncated_newton::minimize_trust_region_newton(
470        fun_f64,
471        None::<fn(&ArrayView1<f64>) -> Array1<f64>>,
472        x0,
473        Some(truncated_options),
474    )?;
475
476    Ok(OptimizeResult {
477        x: result.x,
478        fun: result.fun.into(),
479        nit: result.nit,
480        func_evals: result.func_evals,
481        nfev: result.nfev,
482        jacobian: result.jacobian,
483        hessian: result.hessian,
484        success: result.success,
485        message: result.message,
486    })
487}
488
489/// Wrapper function for trust-region dogleg method
490#[allow(dead_code)]
491fn trust_region_dogleg_wrapper<F, S>(
492    mut fun: F,
493    x0: Array1<f64>,
494    options: &Options,
495) -> Result<OptimizeResult<S>, OptimizeError>
496where
497    F: FnMut(&ArrayView1<f64>) -> S + Clone,
498    S: Into<f64> + Clone + From<f64>,
499{
500    let mut fun_f64 = move |x: &ArrayView1<f64>| fun(x).into();
501
502    let config = TrustRegionConfig {
503        initial_radius: options.trust_radius.unwrap_or(1.0),
504        max_radius: options.max_trust_radius.unwrap_or(100.0),
505        max_iter: options.max_iter,
506        tolerance: options.gtol,
507        ftol: options.ftol,
508        eps: options.eps,
509        min_radius: options.min_trust_radius.unwrap_or(1e-14),
510        ..Default::default()
511    };
512
513    let result = trust_region::trust_region_minimize(
514        &mut fun_f64,
515        None::<fn(&ArrayView1<f64>) -> Array1<f64>>,
516        None::<fn(&ArrayView1<f64>) -> Array2<f64>>,
517        x0,
518        Some(config),
519    )?;
520
521    Ok(OptimizeResult {
522        x: result.x,
523        fun: S::from(result.f_val),
524        nit: result.n_iter,
525        func_evals: result.n_fev,
526        nfev: result.n_fev,
527        jacobian: None,
528        hessian: None,
529        success: result.converged,
530        message: result.message,
531    })
532}
533
534#[cfg(test)]
535mod tests {
536    use super::*;
537    use approx::assert_abs_diff_eq;
538
539    #[test]
540    fn test_simple_quadratic() {
541        let quadratic = |x: &ArrayView1<f64>| -> f64 { x[0] * x[0] + x[1] * x[1] };
542
543        let x0 = vec![1.0, 1.0];
544        let result = minimize(quadratic, &x0, Method::BFGS, None);
545        assert!(result.is_ok());
546
547        let result = result.expect("Operation failed");
548        assert!(result.success);
549        assert_abs_diff_eq!(result.x[0], 0.0, epsilon = 1e-6);
550        assert_abs_diff_eq!(result.x[1], 0.0, epsilon = 1e-6);
551    }
552}