Skip to main content

apex_solver/optimizer/
gauss_newton.rs

1//! Gauss-Newton optimization algorithm implementation.
2//!
3//! The Gauss-Newton method is a fundamental iterative algorithm for solving nonlinear least squares problems
4//! of the form:
5//!
6//! ```text
7//! min f(x) = ½||r(x)||² = ½Σᵢ rᵢ(x)²
8//! ```
9//!
10//! where `r: ℝⁿ → ℝᵐ` is the residual vector function.
11//!
12//! # Algorithm Overview
13//!
14//! The Gauss-Newton method solves the normal equations at each iteration:
15//!
16//! ```text
17//! J^T·J·h = -J^T·r
18//! ```
19//!
20//! where:
21//! - `J` is the Jacobian matrix (m × n) of partial derivatives ∂rᵢ/∂xⱼ
22//! - `r` is the residual vector (m × 1)
23//! - `h` is the step vector (n × 1)
24//!
25//! The approximated Hessian `H ≈ J^T·J` replaces the true Hessian `∇²f = J^T·J + Σᵢ rᵢ·∇²rᵢ`,
26//! which works well when residuals are small or nearly linear.
27//!
28//! ## Convergence Properties
29//!
30//! - **Quadratic convergence** near the solution when the Gauss-Newton approximation is valid
31//! - **May diverge** if the initial guess is far from the optimum or the problem is ill-conditioned
32//! - **No step size control** - always takes the full Newton step without damping
33//!
34//! ## When to Use
35//!
36//! Gauss-Newton is most effective when:
37//! - The problem is well-conditioned with `J^T·J` having good numerical properties
38//! - The initial parameter guess is close to the solution
39//! - Fast convergence is prioritized over robustness
40//! - Residuals at the solution are expected to be small
41//!
42//! For ill-conditioned problems or poor initial guesses, consider:
43//! - [`LevenbergMarquardt`](crate::optimizer::LevenbergMarquardt) for adaptive damping
44//! - [`DogLeg`](crate::optimizer::DogLeg) for trust region control
45//!
46//! # Implementation Features
47//!
48//! - **Sparse matrix support**: Efficient handling of large-scale problems via `faer` sparse library
49//! - **Robust linear solvers**: Choice between Cholesky (fast) and QR (stable) factorizations
50//! - **Jacobi scaling**: Optional diagonal preconditioning to improve conditioning
51//! - **Manifold operations**: Support for optimization on Lie groups (SE2, SE3, SO2, SO3)
52//! - **Comprehensive diagnostics**: Detailed convergence and performance summaries
53//!
54//! # Mathematical Background
55//!
56//! At each iteration k, the algorithm:
57//!
58//! 1. **Linearizes** the problem around current estimate xₖ: `r(xₖ + h) ≈ r(xₖ) + J(xₖ)·h`
59//! 2. **Solves** the normal equations for step h: `J^T·J·h = -J^T·r`
60//! 3. **Updates** parameters: `xₖ₊₁ = xₖ ⊕ h` (using manifold plus operation)
61//! 4. **Checks** convergence criteria (cost, gradient, parameter change)
62//!
63//! The method terminates when cost change, gradient norm, or parameter update fall below
64//! specified tolerances, or when maximum iterations are reached.
65//!
66//! # Examples
67//!
68//! ## Basic usage
69//!
70//! ```no_run
71//! use apex_solver::optimizer::GaussNewton;
72//! use apex_solver::core::problem::Problem;
73//! use apex_solver::JacobianMode;
74//! use std::collections::HashMap;
75//!
76//! # type TestResult = Result<(), Box<dyn std::error::Error>>;
77//! # fn main() -> TestResult {
78//! // Create optimization problem
79//! let mut problem = Problem::new(JacobianMode::Sparse);
80//! // ... add residual blocks (factors) to problem ...
81//!
82//! // Set up initial parameter values
83//! let initial_values = HashMap::new();
84//! // ... initialize parameters ...
85//!
86//! // Create solver with default configuration
87//! let mut solver = GaussNewton::new();
88//!
89//! // Run optimization
90//! let result = solver.optimize(&problem, &initial_values)?;
91//! # Ok(())
92//! # }
93//! ```
94//!
95//! ## Advanced configuration
96//!
97//! ```no_run
98//! use apex_solver::optimizer::gauss_newton::{GaussNewtonConfig, GaussNewton};
99//! use apex_solver::linalg::LinearSolverType;
100//!
101//! # fn main() {
102//! let config = GaussNewtonConfig::new()
103//!     .with_max_iterations(100)
104//!     .with_cost_tolerance(1e-8)
105//!     .with_parameter_tolerance(1e-8)
106//!     .with_gradient_tolerance(1e-10)
107//!     .with_linear_solver_type(LinearSolverType::SparseQR)  // More stable
108//!     .with_jacobi_scaling(true);  // Improve conditioning
109//!
110//! let mut solver = GaussNewton::with_config(config);
111//! # }
112//! ```
113//!
114//! # References
115//!
116//! - Nocedal, J. & Wright, S. (2006). *Numerical Optimization* (2nd ed.). Springer. Chapter 10.
117//! - Madsen, K., Nielsen, H. B., & Tingleff, O. (2004). *Methods for Non-Linear Least Squares Problems* (2nd ed.).
118//! - Björck, Å. (1996). *Numerical Methods for Least Squares Problems*. SIAM.
119
120use crate::error::ErrorLogging;
121use crate::{core::problem, error, linalg, optimizer};
122use apex_manifolds as manifold;
123
124use std::{collections, time};
125use tracing::debug;
126
127use crate::linalg::{
128    DenseCholeskySolver, DenseMode, DenseQRSolver, JacobianMode, LinearSolver, LinearSolverType,
129    SparseCholeskySolver, SparseMode, SparseQRSolver,
130};
131use crate::optimizer::{AssemblyBackend, IterationStats};
132
133/// Configuration parameters for the Gauss-Newton optimizer.
134///
135/// Controls the behavior of the Gauss-Newton algorithm including convergence criteria,
136/// linear solver selection, and numerical stability enhancements.
137///
138/// # Builder Pattern
139///
140/// All configuration options can be set using the builder pattern:
141///
142/// ```
143/// use apex_solver::optimizer::gauss_newton::GaussNewtonConfig;
144/// use apex_solver::linalg::LinearSolverType;
145///
146/// let config = GaussNewtonConfig::new()
147///     .with_max_iterations(50)
148///     .with_cost_tolerance(1e-6)
149///     .with_linear_solver_type(LinearSolverType::SparseQR);
150/// ```
151///
152/// # Convergence Criteria
153///
154/// The optimizer terminates when ANY of the following conditions is met:
155///
156/// - **Cost tolerance**: `|cost_k - cost_{k-1}| < cost_tolerance`
157/// - **Parameter tolerance**: `||step|| < parameter_tolerance`
158/// - **Gradient tolerance**: `||J^T·r|| < gradient_tolerance`
159/// - **Maximum iterations**: `iteration >= max_iterations`
160/// - **Timeout**: `elapsed_time >= timeout`
161///
162/// # See Also
163///
164/// - [`GaussNewton`] - The solver that uses this configuration
165/// - [`LevenbergMarquardtConfig`](crate::optimizer::LevenbergMarquardtConfig) - For adaptive damping
166/// - [`DogLegConfig`](crate::optimizer::DogLegConfig) - For trust region methods
167#[derive(Clone)]
168pub struct GaussNewtonConfig {
169    /// Type of linear solver for the linear systems
170    pub linear_solver_type: linalg::LinearSolverType,
171    /// Maximum number of iterations
172    pub max_iterations: usize,
173    /// Convergence tolerance for cost function
174    pub cost_tolerance: f64,
175    /// Convergence tolerance for parameter updates
176    pub parameter_tolerance: f64,
177    /// Convergence tolerance for gradient norm
178    pub gradient_tolerance: f64,
179    /// Timeout duration
180    pub timeout: Option<time::Duration>,
181    /// Use Jacobi column scaling (preconditioning)
182    ///
183    /// When enabled, normalizes Jacobian columns by their L2 norm before solving.
184    /// This can improve convergence for problems with mixed parameter scales
185    /// (e.g., positions in meters + angles in radians) but adds ~5-10% overhead.
186    ///
187    /// Default: false (Gauss-Newton is typically used on well-conditioned problems)
188    pub use_jacobi_scaling: bool,
189    /// Small regularization to ensure J^T·J is positive definite
190    ///
191    /// Pure Gauss-Newton (λ=0) can fail when J^T·J is singular or near-singular.
192    /// Adding a tiny diagonal regularization (e.g., 1e-10) ensures numerical stability
193    /// while maintaining the fast convergence of Gauss-Newton.
194    ///
195    /// Default: 1e-10 (very small, practically identical to pure Gauss-Newton)
196    pub min_diagonal: f64,
197
198    /// Minimum objective function cutoff (optional early termination)
199    ///
200    /// If set, optimization terminates when cost falls below this threshold.
201    /// Useful for early stopping when a "good enough" solution is acceptable.
202    ///
203    /// Default: None (disabled)
204    pub min_cost_threshold: Option<f64>,
205
206    /// Maximum condition number for Jacobian matrix (optional check)
207    ///
208    /// If set, the optimizer checks if condition_number(J^T*J) exceeds this
209    /// threshold and terminates with IllConditionedJacobian status.
210    /// Note: Computing condition number is expensive, so this is disabled by default.
211    ///
212    /// Default: None (disabled)
213    pub max_condition_number: Option<f64>,
214
215    /// Compute per-variable covariance matrices (uncertainty estimation)
216    ///
217    /// When enabled, computes covariance by inverting the Hessian matrix after
218    /// convergence. The full covariance matrix is extracted into per-variable
219    /// blocks stored in both Variable structs and optimier::SolverResult.
220    ///
221    /// Default: false (to avoid performance overhead)
222    pub compute_covariances: bool,
223
224    /// Enable real-time visualization (graphical debugging).
225    ///
226    /// When enabled, optimization progress is logged to a Rerun viewer.
227    /// **Note:** Requires the `visualization` feature to be enabled in `Cargo.toml`.
228    ///
229    /// Default: false
230    #[cfg(feature = "visualization")]
231    pub enable_visualization: bool,
232}
233
234impl Default for GaussNewtonConfig {
235    fn default() -> Self {
236        Self {
237            linear_solver_type: linalg::LinearSolverType::default(),
238            // Ceres Solver default: 50 (changed from 100 for compatibility)
239            max_iterations: 50,
240            // Ceres Solver default: 1e-6 (changed from 1e-8 for compatibility)
241            cost_tolerance: 1e-6,
242            // Ceres Solver default: 1e-8 (unchanged)
243            parameter_tolerance: 1e-8,
244            // Ceres Solver default: 1e-10 (changed from 1e-8 for compatibility)
245            gradient_tolerance: 1e-10,
246            timeout: None,
247            use_jacobi_scaling: false,
248            min_diagonal: 1e-10,
249            // New Ceres-compatible termination parameters
250            min_cost_threshold: None,
251            max_condition_number: None,
252            compute_covariances: false,
253            #[cfg(feature = "visualization")]
254            enable_visualization: false,
255        }
256    }
257}
258
259impl GaussNewtonConfig {
260    /// Create a new Gauss-Newton configuration with default values.
261    pub fn new() -> Self {
262        Self::default()
263    }
264
265    /// Set the linear solver type
266    pub fn with_linear_solver_type(mut self, linear_solver_type: linalg::LinearSolverType) -> Self {
267        self.linear_solver_type = linear_solver_type;
268        self
269    }
270
271    /// Set the maximum number of iterations
272    pub fn with_max_iterations(mut self, max_iterations: usize) -> Self {
273        self.max_iterations = max_iterations;
274        self
275    }
276
277    /// Set the cost tolerance
278    pub fn with_cost_tolerance(mut self, cost_tolerance: f64) -> Self {
279        self.cost_tolerance = cost_tolerance;
280        self
281    }
282
283    /// Set the parameter tolerance
284    pub fn with_parameter_tolerance(mut self, parameter_tolerance: f64) -> Self {
285        self.parameter_tolerance = parameter_tolerance;
286        self
287    }
288
289    /// Set the gradient tolerance
290    pub fn with_gradient_tolerance(mut self, gradient_tolerance: f64) -> Self {
291        self.gradient_tolerance = gradient_tolerance;
292        self
293    }
294
295    /// Set the timeout duration
296    pub fn with_timeout(mut self, timeout: time::Duration) -> Self {
297        self.timeout = Some(timeout);
298        self
299    }
300
301    /// Enable or disable Jacobi column scaling (preconditioning).
302    ///
303    /// When enabled, normalizes Jacobian columns by their L2 norm before solving.
304    /// Can improve convergence for mixed-scale problems but adds ~5-10% overhead.
305    pub fn with_jacobi_scaling(mut self, use_jacobi_scaling: bool) -> Self {
306        self.use_jacobi_scaling = use_jacobi_scaling;
307        self
308    }
309
310    /// Set the minimum diagonal regularization for numerical stability.
311    ///
312    /// A small value (e.g., 1e-10) ensures J^T·J is positive definite while
313    /// maintaining the fast convergence of pure Gauss-Newton.
314    pub fn with_min_diagonal(mut self, min_diagonal: f64) -> Self {
315        self.min_diagonal = min_diagonal;
316        self
317    }
318
319    /// Set minimum objective function cutoff for early termination.
320    ///
321    /// When set, optimization terminates with MinCostThresholdReached status
322    /// if the cost falls below this threshold. Useful for early stopping when
323    /// a "good enough" solution is acceptable.
324    pub fn with_min_cost_threshold(mut self, min_cost: f64) -> Self {
325        self.min_cost_threshold = Some(min_cost);
326        self
327    }
328
329    /// Set maximum condition number for Jacobian matrix.
330    ///
331    /// If set, the optimizer checks if condition_number(J^T*J) exceeds this
332    /// threshold and terminates with IllConditionedJacobian status.
333    /// Note: Computing condition number is expensive, disabled by default.
334    pub fn with_max_condition_number(mut self, max_cond: f64) -> Self {
335        self.max_condition_number = Some(max_cond);
336        self
337    }
338
339    /// Enable or disable covariance computation (uncertainty estimation).
340    ///
341    /// When enabled, computes the full covariance matrix by inverting the Hessian
342    /// after convergence, then extracts per-variable covariance blocks.
343    pub fn with_compute_covariances(mut self, compute_covariances: bool) -> Self {
344        self.compute_covariances = compute_covariances;
345        self
346    }
347
348    /// Enable real-time visualization.
349    ///
350    /// **Note:** Requires the `visualization` feature to be enabled in `Cargo.toml`.
351    ///
352    /// # Arguments
353    ///
354    /// * `enable` - Whether to enable visualization
355    #[cfg(feature = "visualization")]
356    pub fn with_visualization(mut self, enable: bool) -> Self {
357        self.enable_visualization = enable;
358        self
359    }
360
361    /// Print configuration parameters (info level logging)
362    pub fn print_configuration(&self) {
363        debug!(
364            "\nConfiguration:\n  Solver:        Gauss-Newton\n  Linear solver: {:?}\n  Convergence Criteria:\n  Max iterations:      {}\n  Cost tolerance:      {:.2e}\n  Parameter tolerance: {:.2e}\n  Gradient tolerance:  {:.2e}\n  Timeout:             {:?}\n  Numerical Settings:\n  Jacobi scaling:      {}\n  Compute covariances: {}",
365            self.linear_solver_type,
366            self.max_iterations,
367            self.cost_tolerance,
368            self.parameter_tolerance,
369            self.gradient_tolerance,
370            self.timeout,
371            if self.use_jacobi_scaling {
372                "enabled"
373            } else {
374                "disabled"
375            },
376            if self.compute_covariances {
377                "enabled"
378            } else {
379                "disabled"
380            }
381        );
382    }
383}
384
385/// Result from step computation
386struct StepResult {
387    step: faer::Mat<f64>,
388    gradient_norm: f64,
389}
390
391/// Result from cost evaluation
392struct CostEvaluation {
393    new_cost: f64,
394    cost_reduction: f64,
395}
396
397/// Gauss-Newton solver for nonlinear least squares optimization.
398///
399/// Implements the classical Gauss-Newton algorithm which solves `J^T·J·h = -J^T·r` at each
400/// iteration to find the step `h`. This provides fast quadratic convergence near the solution
401/// but may diverge for poor initial guesses or ill-conditioned problems.
402///
403/// # Algorithm
404///
405/// At each iteration k:
406/// 1. Compute residual `r(xₖ)` and Jacobian `J(xₖ)`
407/// 2. Form normal equations: `(J^T·J)·h = -J^T·r`
408/// 3. Solve for step `h` using Cholesky or QR factorization
409/// 4. Update parameters: `xₖ₊₁ = xₖ ⊕ h` (manifold plus operation)
410/// 5. Check convergence criteria
411///
412/// # Examples
413///
414/// ```no_run
415/// use apex_solver::optimizer::GaussNewton;
416/// use apex_solver::core::problem::Problem;
417/// use apex_solver::JacobianMode;
418/// use std::collections::HashMap;
419///
420/// # type TestResult = Result<(), Box<dyn std::error::Error>>;
421/// # fn main() -> TestResult {
422/// let mut problem = Problem::new(JacobianMode::Sparse);
423/// // ... add factors to problem ...
424///
425/// let initial_values = HashMap::new();
426/// // ... initialize parameters ...
427///
428/// let mut solver = GaussNewton::new();
429/// let result = solver.optimize(&problem, &initial_values)?;
430/// # Ok(())
431/// # }
432/// ```
433///
434/// # See Also
435///
436/// - [`GaussNewtonConfig`] - Configuration options
437/// - [`LevenbergMarquardt`](crate::optimizer::LevenbergMarquardt) - For adaptive damping
438/// - [`DogLeg`](crate::optimizer::DogLeg) - For trust region control
439pub struct GaussNewton {
440    config: GaussNewtonConfig,
441    jacobi_scaling: Option<Vec<f64>>,
442    observers: optimizer::OptObserverVec,
443}
444
445impl Default for GaussNewton {
446    fn default() -> Self {
447        Self::new()
448    }
449}
450
451impl GaussNewton {
452    /// Create a new Gauss-Newton solver with default configuration.
453    pub fn new() -> Self {
454        Self::with_config(GaussNewtonConfig::default())
455    }
456
457    /// Create a new Gauss-Newton solver with the given configuration.
458    pub fn with_config(config: GaussNewtonConfig) -> Self {
459        Self {
460            config,
461            jacobi_scaling: None,
462            observers: optimizer::OptObserverVec::new(),
463        }
464    }
465
466    /// Add an observer to the solver.
467    ///
468    /// Observers are notified at each iteration with the current variable values.
469    /// This enables real-time visualization, logging, metrics collection, etc.
470    ///
471    /// # Examples
472    ///
473    /// ```no_run
474    /// use apex_solver::optimizer::GaussNewton;
475    /// # use apex_solver::optimizer::OptObserver;
476    /// # use std::collections::HashMap;
477    /// # use apex_solver::core::problem::VariableEnum;
478    ///
479    /// # struct MyObserver;
480    /// # impl OptObserver for MyObserver {
481    /// #     fn on_step(&self, _: &HashMap<String, VariableEnum>, _: usize) {}
482    /// # }
483    /// let mut solver = GaussNewton::new();
484    /// solver.add_observer(MyObserver);
485    /// ```
486    pub fn add_observer(&mut self, observer: impl optimizer::OptObserver + 'static) {
487        self.observers.add(observer);
488    }
489
490    /// Compute Gauss-Newton step by solving the normal equations (generic over assembly mode).
491    fn compute_step_generic<M: AssemblyBackend>(
492        &self,
493        residuals: &faer::Mat<f64>,
494        scaled_jacobian: &M::Jacobian,
495        linear_solver: &mut dyn LinearSolver<M>,
496    ) -> Result<StepResult, optimizer::OptimizerError> {
497        // Solve the Gauss-Newton equation: J^T·J·Δx = -J^T·r
498        let residuals_owned = residuals.as_ref().to_owned();
499        let scaled_step = linear_solver
500            .solve_normal_equation(&residuals_owned, scaled_jacobian)
501            .map_err(|e| {
502                optimizer::OptimizerError::LinearSolveFailed(e.to_string()).log_with_source(e)
503            })?;
504
505        // Get gradient from the solver (J^T * r)
506        let gradient = linear_solver.get_gradient().ok_or_else(|| {
507            optimizer::OptimizerError::NumericalInstability("Gradient not available".into()).log()
508        })?;
509        let gradient_norm = gradient.norm_l2();
510
511        // Apply inverse Jacobi scaling to get final step (if enabled)
512        let step = if self.config.use_jacobi_scaling {
513            let scaling = self
514                .jacobi_scaling
515                .as_ref()
516                .ok_or_else(|| optimizer::OptimizerError::JacobiScalingNotInitialized.log())?;
517            M::apply_inverse_scaling(&scaled_step, scaling)
518        } else {
519            scaled_step
520        };
521
522        Ok(StepResult {
523            step,
524            gradient_norm,
525        })
526    }
527
528    /// Apply step to parameters and evaluate new cost
529    fn apply_step_and_evaluate_cost(
530        &self,
531        step_result: &StepResult,
532        state: &mut optimizer::InitializedState,
533        problem: &problem::Problem,
534    ) -> error::ApexSolverResult<CostEvaluation> {
535        // Apply parameter updates using manifold operations
536        let _step_norm = optimizer::apply_parameter_step(
537            &mut state.variables,
538            step_result.step.as_ref(),
539            &state.sorted_vars,
540        );
541
542        // Compute new cost (residual only, no Jacobian needed for step evaluation)
543        let new_residual = problem.compute_residual_sparse(&state.variables)?;
544        let new_cost = optimizer::compute_cost(&new_residual);
545
546        // Compute cost reduction
547        let cost_reduction = state.current_cost - new_cost;
548
549        // Update current cost
550        state.current_cost = new_cost;
551
552        Ok(CostEvaluation {
553            new_cost,
554            cost_reduction,
555        })
556    }
557
558    /// Run optimization using the specified assembly mode and linear solver.
559    fn optimize_with_mode<M: AssemblyBackend>(
560        &mut self,
561        problem: &problem::Problem,
562        initial_params: &collections::HashMap<
563            String,
564            (manifold::ManifoldType, nalgebra::DVector<f64>),
565        >,
566        linear_solver: &mut dyn LinearSolver<M>,
567    ) -> Result<
568        optimizer::SolverResult<collections::HashMap<String, problem::VariableEnum>>,
569        error::ApexSolverError,
570    > {
571        let start_time = time::Instant::now();
572        let mut iteration = 0;
573        let mut cost_evaluations = 1; // Initial cost evaluation
574        let mut jacobian_evaluations = 0;
575
576        // Initialize optimization state
577        let mut state = optimizer::initialize_optimization_state(problem, initial_params)?;
578
579        // Initialize summary tracking variables
580        let mut max_gradient_norm: f64 = 0.0;
581        let mut max_parameter_update_norm: f64 = 0.0;
582        let mut total_cost_reduction = 0.0;
583        let mut final_gradient_norm;
584        let mut final_parameter_update_norm;
585
586        // Initialize iteration statistics tracking
587        let mut iteration_stats = Vec::with_capacity(self.config.max_iterations);
588        let mut previous_cost = state.current_cost;
589
590        // Print configuration and header if debug level is enabled
591        if tracing::enabled!(tracing::Level::DEBUG) {
592            self.config.print_configuration();
593            IterationStats::print_header();
594        }
595
596        // Main optimization loop
597        loop {
598            let iter_start = time::Instant::now();
599
600            // Evaluate residuals and Jacobian using the assembly mode
601            let (residuals, jacobian) = M::assemble(
602                problem,
603                &state.variables,
604                &state.variable_index_map,
605                state.symbolic_structure.as_ref(),
606                state.total_dof,
607            )?;
608            jacobian_evaluations += 1;
609
610            // Process Jacobian (apply scaling if enabled)
611            let scaled_jacobian = if self.config.use_jacobi_scaling {
612                optimizer::process_jacobian_generic::<M>(
613                    &jacobian,
614                    &mut self.jacobi_scaling,
615                    iteration,
616                )?
617            } else {
618                jacobian
619            };
620
621            // Compute Gauss-Newton step
622            let step_result =
623                self.compute_step_generic::<M>(&residuals, &scaled_jacobian, linear_solver)?;
624
625            // Update tracking variables
626            max_gradient_norm = max_gradient_norm.max(step_result.gradient_norm);
627            final_gradient_norm = step_result.gradient_norm;
628            let step_norm = step_result.step.norm_l2();
629            max_parameter_update_norm = max_parameter_update_norm.max(step_norm);
630            final_parameter_update_norm = step_norm;
631
632            // Capture cost before applying step (for convergence check)
633            let cost_before_step = state.current_cost;
634
635            // Apply step and evaluate new cost
636            let cost_eval = self.apply_step_and_evaluate_cost(&step_result, &mut state, problem)?;
637            cost_evaluations += 1;
638            total_cost_reduction += cost_eval.cost_reduction;
639
640            // OPTIMIZATION: Only collect iteration statistics if debug level is enabled
641            if tracing::enabled!(tracing::Level::DEBUG) {
642                let iter_elapsed_ms = iter_start.elapsed().as_secs_f64() * 1000.0;
643                let total_elapsed_ms = start_time.elapsed().as_secs_f64() * 1000.0;
644
645                let stats = IterationStats {
646                    iteration,
647                    cost: state.current_cost,
648                    cost_change: previous_cost - state.current_cost,
649                    gradient_norm: step_result.gradient_norm,
650                    step_norm,
651                    tr_ratio: 0.0,  // Not used in Gauss-Newton
652                    tr_radius: 0.0, // Not used in Gauss-Newton
653                    ls_iter: 0,     // Direct solver (Cholesky) has no iterations
654                    iter_time_ms: iter_elapsed_ms,
655                    total_time_ms: total_elapsed_ms,
656                    accepted: true, // Gauss-Newton always accepts steps
657                };
658
659                iteration_stats.push(stats.clone());
660                stats.print_line();
661            }
662
663            previous_cost = state.current_cost;
664
665            // Notify all observers with current state
666            optimizer::notify_observers_generic::<M>(
667                &mut self.observers,
668                &state.variables,
669                iteration,
670                state.current_cost,
671                step_result.gradient_norm,
672                None, // Gauss-Newton doesn't use damping
673                step_norm,
674                None, // Gauss-Newton doesn't use step quality
675                linear_solver,
676            );
677
678            // Compute parameter norm for convergence check
679            let parameter_norm = optimizer::compute_parameter_norm(&state.variables);
680
681            // Check convergence using comprehensive termination criteria
682            let elapsed = start_time.elapsed();
683            if let Some(status) = optimizer::check_convergence(&optimizer::ConvergenceParams {
684                iteration,
685                current_cost: cost_before_step,
686                new_cost: cost_eval.new_cost,
687                parameter_norm,
688                parameter_update_norm: step_norm,
689                gradient_norm: step_result.gradient_norm,
690                elapsed,
691                step_accepted: true, // GN always accepts
692                max_iterations: self.config.max_iterations,
693                gradient_tolerance: self.config.gradient_tolerance,
694                parameter_tolerance: self.config.parameter_tolerance,
695                cost_tolerance: self.config.cost_tolerance,
696                min_cost_threshold: self.config.min_cost_threshold,
697                timeout: self.config.timeout,
698                trust_region_radius: None,
699                min_trust_region_radius: None,
700            }) {
701                // Print summary only if debug level is enabled
702                if tracing::enabled!(tracing::Level::DEBUG) {
703                    let summary = optimizer::create_optimizer_summary(
704                        "Gauss-Newton",
705                        state.initial_cost,
706                        state.current_cost,
707                        iteration + 1,
708                        None,
709                        None,
710                        max_gradient_norm,
711                        final_gradient_norm,
712                        max_parameter_update_norm,
713                        final_parameter_update_norm,
714                        total_cost_reduction,
715                        elapsed,
716                        iteration_stats.clone(),
717                        status.clone(),
718                        None,
719                        None,
720                        None,
721                    );
722                    debug!("{}", summary);
723                }
724
725                // Compute covariances if enabled
726                let covariances = if self.config.compute_covariances {
727                    problem.compute_and_set_covariances_generic::<M>(
728                        linear_solver,
729                        &mut state.variables,
730                        &state.variable_index_map,
731                    )
732                } else {
733                    None
734                };
735
736                return Ok(optimizer::build_solver_result(
737                    status,
738                    iteration + 1,
739                    state,
740                    elapsed,
741                    final_gradient_norm,
742                    final_parameter_update_norm,
743                    cost_evaluations,
744                    jacobian_evaluations,
745                    covariances,
746                ));
747            }
748
749            iteration += 1;
750        }
751    }
752
753    /// Run optimization, automatically selecting sparse or dense path based on config.
754    pub fn optimize(
755        &mut self,
756        problem: &problem::Problem,
757        initial_params: &collections::HashMap<
758            String,
759            (manifold::ManifoldType, nalgebra::DVector<f64>),
760        >,
761    ) -> Result<
762        optimizer::SolverResult<collections::HashMap<String, problem::VariableEnum>>,
763        error::ApexSolverError,
764    > {
765        match problem.jacobian_mode {
766            JacobianMode::Dense => match self.config.linear_solver_type {
767                LinearSolverType::DenseQR => {
768                    let mut solver = DenseQRSolver::new();
769                    self.optimize_with_mode::<DenseMode>(problem, initial_params, &mut solver)
770                }
771                _ => {
772                    let mut solver = DenseCholeskySolver::new();
773                    self.optimize_with_mode::<DenseMode>(problem, initial_params, &mut solver)
774                }
775            },
776            JacobianMode::Sparse => match self.config.linear_solver_type {
777                linalg::LinearSolverType::SparseQR => {
778                    let mut solver = SparseQRSolver::new();
779                    self.optimize_with_mode::<SparseMode>(problem, initial_params, &mut solver)
780                }
781                _ => {
782                    // SparseCholesky (default), SparseSchurComplement or DenseCholesky with
783                    // sparse mode → SparseCholeskySolver
784                    let mut solver = SparseCholeskySolver::new();
785                    self.optimize_with_mode::<SparseMode>(problem, initial_params, &mut solver)
786                }
787            },
788        }
789    }
790}
791
792impl optimizer::Optimizer for GaussNewton {
793    fn optimize(
794        &mut self,
795        problem: &problem::Problem,
796        initial_params: &std::collections::HashMap<
797            String,
798            (manifold::ManifoldType, nalgebra::DVector<f64>),
799        >,
800    ) -> Result<
801        optimizer::SolverResult<std::collections::HashMap<String, problem::VariableEnum>>,
802        crate::error::ApexSolverError,
803    > {
804        self.optimize(problem, initial_params)
805    }
806}
807
808#[cfg(test)]
809mod tests {
810    use crate::{core::problem, factors, linalg::JacobianMode, optimizer};
811    use apex_manifolds as manifold;
812    use nalgebra::dvector;
813    use std::collections;
814
815    type TestResult = Result<(), Box<dyn std::error::Error>>;
816
817    /// Custom Rosenbrock Factor 1: r1 = 10(x2 - x1²)
818    /// Demonstrates extensibility - custom factors can be defined outside of factors.rs
819    #[derive(Debug, Clone)]
820    struct RosenbrockFactor1;
821
822    impl factors::Factor for RosenbrockFactor1 {
823        fn linearize(
824            &self,
825            params: &[nalgebra::DVector<f64>],
826            compute_jacobian: bool,
827        ) -> (nalgebra::DVector<f64>, Option<nalgebra::DMatrix<f64>>) {
828            let x1 = params[0][0];
829            let x2 = params[1][0];
830
831            // Residual: r1 = 10(x2 - x1²)
832            let residual = nalgebra::dvector![10.0 * (x2 - x1 * x1)];
833
834            // Jacobian: ∂r1/∂x1 = -20*x1, ∂r1/∂x2 = 10
835            let jacobian = if compute_jacobian {
836                let mut jac = nalgebra::DMatrix::zeros(1, 2);
837                jac[(0, 0)] = -20.0 * x1;
838                jac[(0, 1)] = 10.0;
839                Some(jac)
840            } else {
841                None
842            };
843
844            (residual, jacobian)
845        }
846
847        fn get_dimension(&self) -> usize {
848            1
849        }
850    }
851
852    /// Custom Rosenbrock Factor 2: r2 = 1 - x1
853    /// Demonstrates extensibility - custom factors can be defined outside of factors.rs
854    #[derive(Debug, Clone)]
855    struct RosenbrockFactor2;
856
857    impl factors::Factor for RosenbrockFactor2 {
858        fn linearize(
859            &self,
860            params: &[nalgebra::DVector<f64>],
861            compute_jacobian: bool,
862        ) -> (nalgebra::DVector<f64>, Option<nalgebra::DMatrix<f64>>) {
863            let x1 = params[0][0];
864
865            // Residual: r2 = 1 - x1
866            let residual = nalgebra::dvector![1.0 - x1];
867
868            // Jacobian: ∂r2/∂x1 = -1
869            let jacobian = if compute_jacobian {
870                Some(nalgebra::DMatrix::from_element(1, 1, -1.0))
871            } else {
872                None
873            };
874
875            (residual, jacobian)
876        }
877
878        fn get_dimension(&self) -> usize {
879            1
880        }
881    }
882
883    #[test]
884    fn test_rosenbrock_optimization() -> TestResult {
885        // Rosenbrock function test:
886        // Minimize: r1² + r2² where
887        //   r1 = 10(x2 - x1²)
888        //   r2 = 1 - x1
889        // Starting point: [-1.2, 1.0]
890        // Expected minimum: [1.0, 1.0]
891
892        let mut problem = problem::Problem::new(JacobianMode::Sparse);
893        let mut initial_values = collections::HashMap::new();
894
895        // Add variables using Rn manifold (Euclidean space)
896        initial_values.insert(
897            "x1".to_string(),
898            (manifold::ManifoldType::RN, dvector![-1.2]),
899        );
900        initial_values.insert(
901            "x2".to_string(),
902            (manifold::ManifoldType::RN, dvector![1.0]),
903        );
904
905        // Add custom factors (demonstrates extensibility!)
906        problem.add_residual_block(&["x1", "x2"], Box::new(RosenbrockFactor1), None);
907        problem.add_residual_block(&["x1"], Box::new(RosenbrockFactor2), None);
908
909        // Configure Gauss-Newton optimizer
910        let config = optimizer::gauss_newton::GaussNewtonConfig::new()
911            .with_max_iterations(100)
912            .with_cost_tolerance(1e-8)
913            .with_parameter_tolerance(1e-8)
914            .with_gradient_tolerance(1e-10);
915
916        let mut solver = optimizer::GaussNewton::with_config(config);
917        let result = solver.optimize(&problem, &initial_values)?;
918
919        // Extract final values
920        let x1_final = result
921            .parameters
922            .get("x1")
923            .ok_or("x1 not found")?
924            .to_vector()[0];
925        let x2_final = result
926            .parameters
927            .get("x2")
928            .ok_or("x2 not found")?
929            .to_vector()[0];
930
931        // Verify convergence to [1.0, 1.0]
932        assert!(
933            matches!(
934                result.status,
935                optimizer::OptimizationStatus::Converged
936                    | optimizer::OptimizationStatus::CostToleranceReached
937                    | optimizer::OptimizationStatus::ParameterToleranceReached
938                    | optimizer::OptimizationStatus::GradientToleranceReached
939            ),
940            "Optimization should converge"
941        );
942        assert!(
943            (x1_final - 1.0).abs() < 1e-4,
944            "x1 should converge to 1.0, got {}",
945            x1_final
946        );
947        assert!(
948            (x2_final - 1.0).abs() < 1e-4,
949            "x2 should converge to 1.0, got {}",
950            x2_final
951        );
952        assert!(
953            result.final_cost < 1e-6,
954            "Final cost should be near zero, got {}",
955            result.final_cost
956        );
957        Ok(())
958    }
959
960    /// Trivial factor: r = x - target, J = [[1.0]]
961    struct LinearFactor {
962        target: f64,
963    }
964
965    impl factors::Factor for LinearFactor {
966        fn linearize(
967            &self,
968            params: &[nalgebra::DVector<f64>],
969            compute_jacobian: bool,
970        ) -> (nalgebra::DVector<f64>, Option<nalgebra::DMatrix<f64>>) {
971            let residual = nalgebra::dvector![params[0][0] - self.target];
972            let jacobian = if compute_jacobian {
973                Some(nalgebra::DMatrix::from_element(1, 1, 1.0))
974            } else {
975                None
976            };
977            (residual, jacobian)
978        }
979
980        fn get_dimension(&self) -> usize {
981            1
982        }
983    }
984
985    fn rosenbrock_problem() -> (
986        problem::Problem,
987        collections::HashMap<String, (manifold::ManifoldType, nalgebra::DVector<f64>)>,
988    ) {
989        let mut prob = problem::Problem::new(JacobianMode::Sparse);
990        let mut init = collections::HashMap::new();
991        init.insert(
992            "x1".to_string(),
993            (manifold::ManifoldType::RN, nalgebra::dvector![-1.2]),
994        );
995        init.insert(
996            "x2".to_string(),
997            (manifold::ManifoldType::RN, nalgebra::dvector![1.0]),
998        );
999        prob.add_residual_block(&["x1", "x2"], Box::new(RosenbrockFactor1), None);
1000        prob.add_residual_block(&["x1"], Box::new(RosenbrockFactor2), None);
1001        (prob, init)
1002    }
1003
1004    fn linear_problem(
1005        start: f64,
1006    ) -> (
1007        problem::Problem,
1008        collections::HashMap<String, (manifold::ManifoldType, nalgebra::DVector<f64>)>,
1009    ) {
1010        let mut prob = problem::Problem::new(JacobianMode::Sparse);
1011        let mut init = collections::HashMap::new();
1012        init.insert(
1013            "x".to_string(),
1014            (manifold::ManifoldType::RN, nalgebra::dvector![start]),
1015        );
1016        prob.add_residual_block(&["x"], Box::new(LinearFactor { target: 0.0 }), None);
1017        (prob, init)
1018    }
1019
1020    // -------------------------------------------------------------------------
1021    // GaussNewtonConfig builder tests
1022    // -------------------------------------------------------------------------
1023
1024    #[test]
1025    fn test_gn_config_default() {
1026        let cfg = optimizer::gauss_newton::GaussNewtonConfig::default();
1027        assert_eq!(cfg.max_iterations, 50);
1028        assert!((cfg.cost_tolerance - 1e-6).abs() < 1e-15);
1029        assert!(!cfg.use_jacobi_scaling);
1030        assert!(!cfg.compute_covariances);
1031    }
1032
1033    #[test]
1034    fn test_gn_config_builders() {
1035        use crate::linalg::LinearSolverType;
1036        let cfg = optimizer::gauss_newton::GaussNewtonConfig::new()
1037            .with_max_iterations(15)
1038            .with_cost_tolerance(1e-5)
1039            .with_parameter_tolerance(1e-6)
1040            .with_gradient_tolerance(1e-7)
1041            .with_jacobi_scaling(true)
1042            .with_min_diagonal(1e-8)
1043            .with_min_cost_threshold(1e-10)
1044            .with_compute_covariances(true)
1045            .with_linear_solver_type(LinearSolverType::SparseQR);
1046        assert_eq!(cfg.max_iterations, 15);
1047        assert!((cfg.cost_tolerance - 1e-5).abs() < 1e-20);
1048        assert!(cfg.use_jacobi_scaling);
1049        assert!(cfg.min_cost_threshold.is_some());
1050        assert!(cfg.compute_covariances);
1051        assert!(matches!(cfg.linear_solver_type, LinearSolverType::SparseQR));
1052    }
1053
1054    #[test]
1055    fn test_gn_print_configuration_no_panic() {
1056        optimizer::gauss_newton::GaussNewtonConfig::default().print_configuration();
1057    }
1058
1059    #[test]
1060    fn test_gn_default_equals_new() {
1061        let _a = optimizer::GaussNewton::new();
1062        let _b = optimizer::GaussNewton::default();
1063    }
1064
1065    #[test]
1066    fn test_gn_with_config_method() {
1067        let cfg = optimizer::gauss_newton::GaussNewtonConfig::new().with_max_iterations(3);
1068        let _solver = optimizer::GaussNewton::with_config(cfg);
1069    }
1070
1071    // -------------------------------------------------------------------------
1072    // Convergence termination paths
1073    // -------------------------------------------------------------------------
1074
1075    #[test]
1076    fn test_gn_max_iterations_termination() -> TestResult {
1077        let (problem, initial_values) = rosenbrock_problem();
1078        let cfg = optimizer::gauss_newton::GaussNewtonConfig::new().with_max_iterations(2);
1079        let mut solver = optimizer::GaussNewton::with_config(cfg);
1080        let result = solver.optimize(&problem, &initial_values)?;
1081        assert_eq!(
1082            result.status,
1083            optimizer::OptimizationStatus::MaxIterationsReached
1084        );
1085        Ok(())
1086    }
1087
1088    #[test]
1089    fn test_gn_gradient_tolerance_convergence() -> TestResult {
1090        let (problem, initial_values) = linear_problem(1.0);
1091        let cfg = optimizer::gauss_newton::GaussNewtonConfig::new()
1092            .with_gradient_tolerance(1e3)
1093            .with_cost_tolerance(1e-20)
1094            .with_parameter_tolerance(1e-20);
1095        let mut solver = optimizer::GaussNewton::with_config(cfg);
1096        let result = solver.optimize(&problem, &initial_values)?;
1097        assert_eq!(
1098            result.status,
1099            optimizer::OptimizationStatus::GradientToleranceReached
1100        );
1101        Ok(())
1102    }
1103
1104    #[test]
1105    fn test_gn_cost_tolerance_convergence() -> TestResult {
1106        let (problem, initial_values) = rosenbrock_problem();
1107        let cfg = optimizer::gauss_newton::GaussNewtonConfig::new()
1108            .with_cost_tolerance(1e2) // very loose
1109            .with_gradient_tolerance(1e-20)
1110            .with_parameter_tolerance(1e-20);
1111        let mut solver = optimizer::GaussNewton::with_config(cfg);
1112        let result = solver.optimize(&problem, &initial_values)?;
1113        assert!(matches!(
1114            result.status,
1115            optimizer::OptimizationStatus::CostToleranceReached
1116                | optimizer::OptimizationStatus::GradientToleranceReached
1117                | optimizer::OptimizationStatus::ParameterToleranceReached
1118                | optimizer::OptimizationStatus::Converged
1119        ));
1120        Ok(())
1121    }
1122
1123    #[test]
1124    fn test_gn_qr_solver() -> TestResult {
1125        use crate::linalg::LinearSolverType;
1126        let (problem, initial_values) = rosenbrock_problem();
1127        let cfg = optimizer::gauss_newton::GaussNewtonConfig::new()
1128            .with_linear_solver_type(LinearSolverType::SparseQR)
1129            .with_max_iterations(100);
1130        let mut solver = optimizer::GaussNewton::with_config(cfg);
1131        let result = solver.optimize(&problem, &initial_values)?;
1132        assert!(result.final_cost < 1e-6);
1133        Ok(())
1134    }
1135
1136    #[test]
1137    fn test_gn_jacobi_scaling_enabled() -> TestResult {
1138        let (problem, initial_values) = rosenbrock_problem();
1139        let cfg = optimizer::gauss_newton::GaussNewtonConfig::new()
1140            .with_jacobi_scaling(true)
1141            .with_max_iterations(100);
1142        let mut solver = optimizer::GaussNewton::with_config(cfg);
1143        let result = solver.optimize(&problem, &initial_values)?;
1144        assert!(result.final_cost < 1e-6);
1145        Ok(())
1146    }
1147
1148    #[test]
1149    fn test_gn_min_cost_threshold() -> TestResult {
1150        let (problem, initial_values) = rosenbrock_problem();
1151        let cfg = optimizer::gauss_newton::GaussNewtonConfig::new()
1152            .with_min_cost_threshold(1e10)
1153            .with_cost_tolerance(1e-20)
1154            .with_gradient_tolerance(1e-20)
1155            .with_parameter_tolerance(1e-20);
1156        let mut solver = optimizer::GaussNewton::with_config(cfg);
1157        let result = solver.optimize(&problem, &initial_values)?;
1158        assert_eq!(
1159            result.status,
1160            optimizer::OptimizationStatus::MinCostThresholdReached
1161        );
1162        Ok(())
1163    }
1164
1165    #[test]
1166    fn test_gn_result_fields() -> TestResult {
1167        let (problem, initial_values) = rosenbrock_problem();
1168        let mut solver = optimizer::GaussNewton::new();
1169        let result = solver.optimize(&problem, &initial_values)?;
1170        assert!(result.initial_cost > result.final_cost);
1171        assert!(result.iterations > 0);
1172        Ok(())
1173    }
1174
1175    #[test]
1176    fn test_gn_convergence_info_populated() -> TestResult {
1177        let (problem, initial_values) = rosenbrock_problem();
1178        let mut solver = optimizer::GaussNewton::new();
1179        let result = solver.optimize(&problem, &initial_values)?;
1180        assert!(result.convergence_info.is_some());
1181        Ok(())
1182    }
1183
1184    #[test]
1185    fn test_gn_timeout_config() {
1186        let cfg = optimizer::gauss_newton::GaussNewtonConfig::new()
1187            .with_timeout(std::time::Duration::from_secs(60));
1188        assert!(cfg.timeout.is_some());
1189    }
1190}