qudit_inst/numerical/minimizers/
lm.rs

1#![allow(non_snake_case)] // A lot of math in here, okay to be flexible with var names.
2
3use crate::numerical::Function;
4use crate::numerical::Jacobian;
5use crate::numerical::MinimizationAlgorithm;
6use crate::numerical::MinimizationResult;
7use crate::numerical::ProvidesJacobian;
8use crate::numerical::ResidualFunction;
9use faer::linalg::solvers::Solve;
10use faer::{Accum, Col, Mat, Par, linalg::matmul::matmul};
11use qudit_core::RealScalar;
12
13/// Levenberg-Marquardt (LM) optimization algorithm.
14///
15/// This struct implements the Levenberg-Marquardt algorithm for non-linear
16/// least squares problems. It combines the Gauss-Newton algorithm with
17/// gradient descent.
18#[derive(Clone)]
19pub struct LM<R: RealScalar> {
20    /// Maximum number of iterations.
21    pub max_iterations: usize,
22    /// Tolerance for convergence (change in parameters).
23    pub tolerance: R,
24    /// Initial damping parameter.
25    pub initial_lambda: Option<R>,
26    /// Initial lambda scaling factor.
27    pub tau: R,
28    /// Multiplier for lambda when a step is rejected.
29    pub lambda_increase_factor: R,
30    /// Divisor for lambda when a step is accepted.
31    pub lambda_decrease_factor: R,
32
33    pub minimum_lambda: R,
34    /// Maximum allowed lambda value - if exceeded, optimization fails
35    pub maximum_lambda: R,
36    /// Minimum gradient norm - if gradient becomes smaller, optimization fails
37    pub minimum_gradient_norm: R,
38    /// Absolute tolerance for cost difference - terminate if cost improvement is too small
39    pub diff_tol_a: R,
40    /// Relative tolerance for cost difference - terminate if cost improvement is too small relative to current cost
41    pub diff_tol_r: R,
42}
43
44impl<R: RealScalar> Default for LM<R> {
45    fn default() -> Self {
46        Self {
47            max_iterations: 75,
48            tolerance: R::from64(1e-8),
49            initial_lambda: None,
50            tau: R::from64(1e-3),
51            lambda_increase_factor: R::from64(2.0),
52            lambda_decrease_factor: R::from64(3.0),
53            minimum_lambda: R::from64(10.0) * R::epsilon(),
54            maximum_lambda: R::from64(1e12),
55            minimum_gradient_norm: R::from64(10.0) * R::epsilon(),
56            diff_tol_a: R::from64(1e-10),
57            diff_tol_r: R::from64(1e-6),
58        }
59    }
60}
61
62impl<R, P> MinimizationAlgorithm<R, P> for LM<R>
63where
64    R: RealScalar,
65    P: ProvidesJacobian<R>,
66{
67    type Func = P::Jacobian;
68
69    fn initialize(&self, problem: &P) -> P::Jacobian {
70        problem.build_jacobian()
71    }
72
73    /// Reference: <https://arxiv.org/pdf/1201.5885>
74    /// Reference: <https://scispace.com/pdf/the-levenberg-marquardt-algorithm-implementation-and-theory-u1ziue6l3q.pdf>
75    fn minimize(&self, objective: &mut P::Jacobian, x0: &[R]) -> MinimizationResult<R> {
76        let mut x = Col::from_fn(x0.len(), |i| x0[i]);
77        let mut Rbuf = objective.allocate_residual();
78        let mut Jbuf = objective.allocate_jacobian();
79        let mut JtJ: Mat<R> = Mat::zeros(objective.num_params(), objective.num_params());
80        let mut Jtr: Mat<R> = Mat::zeros(objective.num_params(), 1);
81
82        // /////// FINITE ANALYSIS TEST
83        // unsafe {
84        //     // Calculate new residuals and jacobian
85        //     let param_slice = std::slice::from_raw_parts(x.as_ptr(), x.nrows());
86        //     objective.residuals_and_jacobian_into(param_slice, Rbuf.as_mut(), Jbuf.as_mut());
87        // }
88
89        // // Finite difference test for Jacobian correctness
90        // let epsilon = R::from64(1e-6); // Small perturbation
91        // let mut Rbuf_perturbed = objective.allocate_residual();
92
93        // println!("\n--- Finite Difference Jacobian Test ---");
94        // for j in 0..n_params {
95        //     let mut x_perturbed = x.clone(); // Copy x
96        //     // Perturb the j-th parameter
97        //     x_perturbed[j] += epsilon;
98
99        //     // Calculate residuals at x_perturbed
100        //     unsafe {
101        //         let param_slice = std::slice::from_raw_parts(x_perturbed.as_ptr(), x_perturbed.nrows());
102        //         objective.residuals_into(param_slice, Rbuf_perturbed.as_mut());
103        //     }
104
105        //     // Calculate finite difference approximation for the j-th column of the Jacobian
106        //     // J_fd_j = (R(x + e_j * epsilon) - R(x)) / epsilon
107        //     let fd_jacobian_col_j = (Rbuf_perturbed.as_mat() - Rbuf.as_mat()) * Scale(R::one() / epsilon);
108
109        //     // Get the analytical j-th column of the Jacobian
110        //     let analytical_jacobian_col_j = Jbuf.col(j);
111
112        //     // Compare: calculate the L2 norm of the difference
113        //     let diff_norm = (fd_jacobian_col_j - analytical_jacobian_col_j.as_mat()).norm_l2();
114        //     let analytical_norm = analytical_jacobian_col_j.norm_l2();
115
116        //     println!(
117        //         "Parameter {}: Diff Norm = {}, Analytical Norm = {}",
118        //         j, diff_norm, analytical_norm
119        //     );
120
121        //     // Optional: Add a threshold for warning
122        //     if analytical_norm > R::from64(1e-10) && diff_norm / analytical_norm > R::from64(1e-4) {
123        //         // A relative error of 0.01% might be acceptable, adjust as needed
124        //         println!(
125        //             "  WARNING: Large relative discrepancy. Relative error: {}",
126        //             diff_norm / analytical_norm
127        //         );
128        //     } else if analytical_norm <= R::from64(1e-10) && diff_norm > R::from64(1e-8) {
129        //         // If analytical norm is very small, check absolute difference
130        //         println!(
131        //             "  WARNING: Large absolute discrepancy (analytical norm near zero). Absolute error: {}",
132        //             diff_norm
133        //         );
134        //     }
135        // }
136        // println!("--- End Finite Difference Jacobian Test ---\n");
137        // ///////
138
139        // Safety: x is non null, initialized, and read-only while param_slice is alive.
140        unsafe {
141            // Calculate new residuals and jacobian
142            let param_slice = std::slice::from_raw_parts(x.as_ptr(), x.nrows());
143            objective.residuals_and_jacobian_into(param_slice, Rbuf.as_mut(), Jbuf.as_mut());
144            // dbg!(&Jbuf);
145        }
146
147        // Calculate current cost
148        let mut current_cost = Rbuf.squared_norm_l2() * R::from64(0.5);
149
150        // Check for invalid initial cost
151        if current_cost.is_nan() || current_cost.is_infinite() {
152            return MinimizationResult {
153                params: vec![],
154                fun: current_cost,
155                status: 2,
156                message: Some("Initial cost function is invalid (NaN or infinity)".to_string()),
157            };
158        }
159
160        // Calculate hessian and gradient approximation to form new linear system problem
161        matmul(
162            &mut JtJ,
163            Accum::Replace,
164            Jbuf.transpose(),
165            &Jbuf,
166            R::one(),
167            Par::Seq,
168        );
169        matmul(
170            &mut Jtr,
171            Accum::Replace,
172            Jbuf.transpose(),
173            Rbuf.as_mat(),
174            R::one(),
175            Par::Seq,
176        );
177
178        // Check gradient norm
179        let gradient_norm = Jtr.norm_l2();
180        if gradient_norm < self.minimum_gradient_norm {
181            return MinimizationResult {
182                params: Vec::from_iter(x.iter().copied()),
183                fun: current_cost,
184                status: 6,
185                message: Some("Gradient norm too small - optimization stagnated".to_string()),
186            };
187        }
188
189        // Start lambda scaled to problem if not specified
190        let mut lambda = match self.initial_lambda {
191            None => {
192                let mut iter = JtJ.diagonal().column_vector().iter();
193                let mut l = iter.next().expect("Empty Jacobian");
194                for cand_l in iter {
195                    if cand_l > l {
196                        l = cand_l;
197                    }
198                }
199                *l
200            }
201            Some(l) => l,
202        };
203
204        for _ in 0..self.max_iterations {
205            // Check if lambda has grown too large (ill-conditioned problem)
206            if lambda > self.maximum_lambda {
207                return MinimizationResult {
208                    params: Vec::from_iter(x.iter().copied()),
209                    fun: current_cost,
210                    status: 3,
211                    message: Some(
212                        "Lambda exceeded maximum threshold - problem is ill-conditioned"
213                            .to_string(),
214                    ),
215                };
216            }
217
218            // Damp hessian approximation
219            for j in JtJ.diagonal_mut().column_vector_mut().iter_mut() {
220                *j += lambda * j.clamp(R::epsilon() * R::from64(10.0), R::from64(10.0));
221            }
222
223            // Check JTJ for NaN or infinity values
224            let mut has_invalid_jtj = false;
225            for col in JtJ.col_iter() {
226                for elem in col.iter() {
227                    if elem.is_nan() || elem.is_infinite() || elem.is_subnormal() {
228                        has_invalid_jtj = true;
229                    }
230                }
231            }
232            if has_invalid_jtj {
233                // println!("Step rejected due to NaN or infinity in JtJ matrix.");
234                // Recalculate JtJ to reset it to the undamped version before increasing lambda
235                matmul(
236                    &mut JtJ,
237                    Accum::Replace,
238                    Jbuf.transpose(),
239                    &Jbuf,
240                    R::one(),
241                    Par::Seq,
242                );
243                lambda *= self.lambda_increase_factor;
244                // println!("New lambda: {}", lambda);
245                continue;
246            }
247
248            // // check condition of JTJ
249            // let s = JtJ.svd().unwrap();
250            // let max_s = s.S().column_vector().iter().next().unwrap();
251            // let min_s = s.S().column_vector().iter().last().unwrap();
252            // if RealScalar::abs(*min_s) < R::from64(1e-12) { // Avoid division by a near-zero number
253            //     dbg!("Infinity");
254            // } else {
255            //     dbg!(max_s.to64() / min_s.to64());
256            // }
257
258            // Solve for delta_x
259            let delta_x = match JtJ.llt(faer::Side::Lower) {
260                Ok(llt) => llt.solve(&Jtr),
261                Err(_) => {
262                    // Step rejected
263                    // println!("Step rejected due to invalid llt: {}", e);
264                    // matmul(&mut JtJ, Accum::Replace, &Jbuf.transpose(), &Jbuf, R::one(), Par::Seq);
265                    // lambda *= self.lambda_increase_factor;
266                    // println!("New lambda: {}", lambda);
267                    // continue;
268                    JtJ.lblt(faer::Side::Lower).solve(&Jtr)
269                }
270            };
271            // TODO: If this fails; fail step
272            // let delta_x = llt.solve(&Jtr); // TODO: remove unnecessary allocations using scratch
273            // space: https://faer.veganb.tw/docs/dense-linalg/linsolve/
274
275            // Calculate x_new
276            let x_new = &x - delta_x.col(0); // TODO: remove allocation
277
278            // Evaluate cost at x_new
279            // Safety: x_new is non null, initialized, and read-only while param_slice is alive.
280            unsafe {
281                let param_slice = std::slice::from_raw_parts(x_new.as_ptr(), x_new.nrows());
282                objective.residuals_into(param_slice, Rbuf.as_mut());
283            }
284            let new_cost = Rbuf.squared_norm_l2() * R::from64(0.5);
285
286            // Check for invalid cost
287            if new_cost.is_nan() || new_cost.is_infinite() {
288                return MinimizationResult {
289                    params: Vec::from_iter(x.iter().copied()),
290                    fun: current_cost,
291                    status: 4,
292                    message: Some("Cost function became invalid (NaN or infinity)".to_string()),
293                };
294            }
295
296            // Calculate gain ratio for proper LM updates
297            let actual_reduction = current_cost - new_cost;
298
299            // Simplified predicted reduction: step_norm^2 * (lambda + gradient_norm)
300            let step_norm = delta_x.norm_l2();
301            let gradient_norm = Jtr.norm_l2();
302            let predicted_reduction = step_norm * step_norm * (lambda + gradient_norm);
303
304            let gain_ratio = if predicted_reduction < R::from64(1e-14)
305                || predicted_reduction.is_nan()
306                || predicted_reduction.is_infinite()
307            {
308                R::from64(-1.0) // Treat as bad step
309            } else {
310                actual_reduction / predicted_reduction
311            };
312
313            // Accept step if gain ratio is positive
314            if gain_ratio > R::zero() {
315                // Step accepted
316                x = x_new;
317                if new_cost.is_close_with_tolerance(current_cost, self.diff_tol_r, self.diff_tol_a)
318                {
319                    // if RealScalar::abs(new_cost - current_cost) <= self.diff_tol_a + self.diff_tol_r * RealScalar::abs(current_cost) {
320                    return MinimizationResult {
321                        params: Vec::from_iter(x.iter().copied()),
322                        fun: new_cost,
323                        status: 7,
324                        message: Some(
325                            "Terminated because lack of sufficient cost improvement.".to_string(),
326                        ),
327                    };
328                }
329                current_cost = new_cost;
330
331                // Update lambda based on gain ratio quality
332                if gain_ratio > R::from64(0.75) {
333                    // Very good step, decrease damping aggressively
334                    lambda /= self.lambda_decrease_factor;
335                } else if gain_ratio > R::from64(0.25) {
336                    // Moderate step, decrease damping moderately
337                    lambda /= R::from64(2.0);
338                }
339                // For gain_ratio <= 0.25, keep lambda unchanged
340
341                lambda = if lambda < self.minimum_lambda {
342                    self.minimum_lambda
343                } else {
344                    lambda
345                };
346
347                // Check for invalid parameters
348                for param in x.iter() {
349                    if param.is_nan() || param.is_infinite() {
350                        return MinimizationResult {
351                            params: Vec::from_iter(x.iter().copied()),
352                            fun: current_cost,
353                            status: 5,
354                            message: Some(
355                                "Parameters became invalid (NaN or infinity)".to_string(),
356                            ),
357                        };
358                    }
359                }
360
361                // Safety: x is non null, initialized, and read-only while param_slice is alive.
362                unsafe {
363                    // Calculate new residuals and jacobian
364                    let param_slice = std::slice::from_raw_parts(x.as_ptr(), x.nrows());
365                    objective.residuals_and_jacobian_into(
366                        param_slice,
367                        Rbuf.as_mut(),
368                        Jbuf.as_mut(),
369                    );
370                    // dbg!(&Jbuf);
371                }
372
373                // Calculate hessian and gradient approximation to form new linear system problem
374                matmul(
375                    &mut JtJ,
376                    Accum::Replace,
377                    Jbuf.transpose(),
378                    &Jbuf,
379                    R::one(),
380                    Par::Seq,
381                );
382                matmul(
383                    &mut Jtr,
384                    Accum::Replace,
385                    Jbuf.transpose(),
386                    Rbuf.as_mat(),
387                    R::one(),
388                    Par::Seq,
389                );
390
391                // Check gradient norm after step acceptance
392                let gradient_norm = Jtr.norm_l2();
393                if gradient_norm < self.minimum_gradient_norm {
394                    return MinimizationResult {
395                        params: Vec::from_iter(x.iter().copied()),
396                        fun: current_cost,
397                        status: 6,
398                        message: Some(
399                            "Gradient norm too small - optimization stagnated".to_string(),
400                        ),
401                    };
402                }
403            } else {
404                // Step rejected - poor gain ratio
405                matmul(
406                    &mut JtJ,
407                    Accum::Replace,
408                    Jbuf.transpose(),
409                    &Jbuf,
410                    R::one(),
411                    Par::Seq,
412                );
413
414                // Update lambda based on how bad the step was
415                if gain_ratio < R::from64(-0.5) {
416                    // Very bad step, increase damping aggressively
417                    lambda *= self.lambda_increase_factor * R::from64(2.0);
418                } else {
419                    // Moderately bad step, increase damping normally
420                    lambda *= self.lambda_increase_factor;
421                }
422            }
423
424            // Check for convergence
425            if delta_x.norm_l2() < self.tolerance {
426                let params_out = Vec::from_iter(x.iter().copied());
427                return MinimizationResult::simple_success(params_out, current_cost);
428            }
429        }
430
431        // TODO: better result converying the failure
432        MinimizationResult {
433            params: vec![],
434            fun: R::from64(1.0),
435            status: 1,
436            message: None,
437        }
438    }
439}