qudit_inst/numerical/minimizers/lm.rs
1#![allow(non_snake_case)] // A lot of math in here, okay to be flexible with var names.
2
3use crate::numerical::Function;
4use crate::numerical::Jacobian;
5use crate::numerical::MinimizationAlgorithm;
6use crate::numerical::MinimizationResult;
7use crate::numerical::ProvidesJacobian;
8use crate::numerical::ResidualFunction;
9use faer::linalg::solvers::Solve;
10use faer::{Accum, Col, Mat, Par, linalg::matmul::matmul};
11use qudit_core::RealScalar;
12
13/// Levenberg-Marquardt (LM) optimization algorithm.
14///
15/// This struct implements the Levenberg-Marquardt algorithm for non-linear
16/// least squares problems. It combines the Gauss-Newton algorithm with
17/// gradient descent.
18#[derive(Clone)]
19pub struct LM<R: RealScalar> {
20 /// Maximum number of iterations.
21 pub max_iterations: usize,
22 /// Tolerance for convergence (change in parameters).
23 pub tolerance: R,
24 /// Initial damping parameter.
25 pub initial_lambda: Option<R>,
26 /// Initial lambda scaling factor.
27 pub tau: R,
28 /// Multiplier for lambda when a step is rejected.
29 pub lambda_increase_factor: R,
30 /// Divisor for lambda when a step is accepted.
31 pub lambda_decrease_factor: R,
32
33 pub minimum_lambda: R,
34 /// Maximum allowed lambda value - if exceeded, optimization fails
35 pub maximum_lambda: R,
36 /// Minimum gradient norm - if gradient becomes smaller, optimization fails
37 pub minimum_gradient_norm: R,
38 /// Absolute tolerance for cost difference - terminate if cost improvement is too small
39 pub diff_tol_a: R,
40 /// Relative tolerance for cost difference - terminate if cost improvement is too small relative to current cost
41 pub diff_tol_r: R,
42}
43
44impl<R: RealScalar> Default for LM<R> {
45 fn default() -> Self {
46 Self {
47 max_iterations: 75,
48 tolerance: R::from64(1e-8),
49 initial_lambda: None,
50 tau: R::from64(1e-3),
51 lambda_increase_factor: R::from64(2.0),
52 lambda_decrease_factor: R::from64(3.0),
53 minimum_lambda: R::from64(10.0) * R::epsilon(),
54 maximum_lambda: R::from64(1e12),
55 minimum_gradient_norm: R::from64(10.0) * R::epsilon(),
56 diff_tol_a: R::from64(1e-10),
57 diff_tol_r: R::from64(1e-6),
58 }
59 }
60}
61
62impl<R, P> MinimizationAlgorithm<R, P> for LM<R>
63where
64 R: RealScalar,
65 P: ProvidesJacobian<R>,
66{
67 type Func = P::Jacobian;
68
69 fn initialize(&self, problem: &P) -> P::Jacobian {
70 problem.build_jacobian()
71 }
72
73 /// Reference: <https://arxiv.org/pdf/1201.5885>
74 /// Reference: <https://scispace.com/pdf/the-levenberg-marquardt-algorithm-implementation-and-theory-u1ziue6l3q.pdf>
75 fn minimize(&self, objective: &mut P::Jacobian, x0: &[R]) -> MinimizationResult<R> {
76 let mut x = Col::from_fn(x0.len(), |i| x0[i]);
77 let mut Rbuf = objective.allocate_residual();
78 let mut Jbuf = objective.allocate_jacobian();
79 let mut JtJ: Mat<R> = Mat::zeros(objective.num_params(), objective.num_params());
80 let mut Jtr: Mat<R> = Mat::zeros(objective.num_params(), 1);
81
82 // /////// FINITE ANALYSIS TEST
83 // unsafe {
84 // // Calculate new residuals and jacobian
85 // let param_slice = std::slice::from_raw_parts(x.as_ptr(), x.nrows());
86 // objective.residuals_and_jacobian_into(param_slice, Rbuf.as_mut(), Jbuf.as_mut());
87 // }
88
89 // // Finite difference test for Jacobian correctness
90 // let epsilon = R::from64(1e-6); // Small perturbation
91 // let mut Rbuf_perturbed = objective.allocate_residual();
92
93 // println!("\n--- Finite Difference Jacobian Test ---");
94 // for j in 0..n_params {
95 // let mut x_perturbed = x.clone(); // Copy x
96 // // Perturb the j-th parameter
97 // x_perturbed[j] += epsilon;
98
99 // // Calculate residuals at x_perturbed
100 // unsafe {
101 // let param_slice = std::slice::from_raw_parts(x_perturbed.as_ptr(), x_perturbed.nrows());
102 // objective.residuals_into(param_slice, Rbuf_perturbed.as_mut());
103 // }
104
105 // // Calculate finite difference approximation for the j-th column of the Jacobian
106 // // J_fd_j = (R(x + e_j * epsilon) - R(x)) / epsilon
107 // let fd_jacobian_col_j = (Rbuf_perturbed.as_mat() - Rbuf.as_mat()) * Scale(R::one() / epsilon);
108
109 // // Get the analytical j-th column of the Jacobian
110 // let analytical_jacobian_col_j = Jbuf.col(j);
111
112 // // Compare: calculate the L2 norm of the difference
113 // let diff_norm = (fd_jacobian_col_j - analytical_jacobian_col_j.as_mat()).norm_l2();
114 // let analytical_norm = analytical_jacobian_col_j.norm_l2();
115
116 // println!(
117 // "Parameter {}: Diff Norm = {}, Analytical Norm = {}",
118 // j, diff_norm, analytical_norm
119 // );
120
121 // // Optional: Add a threshold for warning
122 // if analytical_norm > R::from64(1e-10) && diff_norm / analytical_norm > R::from64(1e-4) {
123 // // A relative error of 0.01% might be acceptable, adjust as needed
124 // println!(
125 // " WARNING: Large relative discrepancy. Relative error: {}",
126 // diff_norm / analytical_norm
127 // );
128 // } else if analytical_norm <= R::from64(1e-10) && diff_norm > R::from64(1e-8) {
129 // // If analytical norm is very small, check absolute difference
130 // println!(
131 // " WARNING: Large absolute discrepancy (analytical norm near zero). Absolute error: {}",
132 // diff_norm
133 // );
134 // }
135 // }
136 // println!("--- End Finite Difference Jacobian Test ---\n");
137 // ///////
138
139 // Safety: x is non null, initialized, and read-only while param_slice is alive.
140 unsafe {
141 // Calculate new residuals and jacobian
142 let param_slice = std::slice::from_raw_parts(x.as_ptr(), x.nrows());
143 objective.residuals_and_jacobian_into(param_slice, Rbuf.as_mut(), Jbuf.as_mut());
144 // dbg!(&Jbuf);
145 }
146
147 // Calculate current cost
148 let mut current_cost = Rbuf.squared_norm_l2() * R::from64(0.5);
149
150 // Check for invalid initial cost
151 if current_cost.is_nan() || current_cost.is_infinite() {
152 return MinimizationResult {
153 params: vec![],
154 fun: current_cost,
155 status: 2,
156 message: Some("Initial cost function is invalid (NaN or infinity)".to_string()),
157 };
158 }
159
160 // Calculate hessian and gradient approximation to form new linear system problem
161 matmul(
162 &mut JtJ,
163 Accum::Replace,
164 Jbuf.transpose(),
165 &Jbuf,
166 R::one(),
167 Par::Seq,
168 );
169 matmul(
170 &mut Jtr,
171 Accum::Replace,
172 Jbuf.transpose(),
173 Rbuf.as_mat(),
174 R::one(),
175 Par::Seq,
176 );
177
178 // Check gradient norm
179 let gradient_norm = Jtr.norm_l2();
180 if gradient_norm < self.minimum_gradient_norm {
181 return MinimizationResult {
182 params: Vec::from_iter(x.iter().copied()),
183 fun: current_cost,
184 status: 6,
185 message: Some("Gradient norm too small - optimization stagnated".to_string()),
186 };
187 }
188
189 // Start lambda scaled to problem if not specified
190 let mut lambda = match self.initial_lambda {
191 None => {
192 let mut iter = JtJ.diagonal().column_vector().iter();
193 let mut l = iter.next().expect("Empty Jacobian");
194 for cand_l in iter {
195 if cand_l > l {
196 l = cand_l;
197 }
198 }
199 *l
200 }
201 Some(l) => l,
202 };
203
204 for _ in 0..self.max_iterations {
205 // Check if lambda has grown too large (ill-conditioned problem)
206 if lambda > self.maximum_lambda {
207 return MinimizationResult {
208 params: Vec::from_iter(x.iter().copied()),
209 fun: current_cost,
210 status: 3,
211 message: Some(
212 "Lambda exceeded maximum threshold - problem is ill-conditioned"
213 .to_string(),
214 ),
215 };
216 }
217
218 // Damp hessian approximation
219 for j in JtJ.diagonal_mut().column_vector_mut().iter_mut() {
220 *j += lambda * j.clamp(R::epsilon() * R::from64(10.0), R::from64(10.0));
221 }
222
223 // Check JTJ for NaN or infinity values
224 let mut has_invalid_jtj = false;
225 for col in JtJ.col_iter() {
226 for elem in col.iter() {
227 if elem.is_nan() || elem.is_infinite() || elem.is_subnormal() {
228 has_invalid_jtj = true;
229 }
230 }
231 }
232 if has_invalid_jtj {
233 // println!("Step rejected due to NaN or infinity in JtJ matrix.");
234 // Recalculate JtJ to reset it to the undamped version before increasing lambda
235 matmul(
236 &mut JtJ,
237 Accum::Replace,
238 Jbuf.transpose(),
239 &Jbuf,
240 R::one(),
241 Par::Seq,
242 );
243 lambda *= self.lambda_increase_factor;
244 // println!("New lambda: {}", lambda);
245 continue;
246 }
247
248 // // check condition of JTJ
249 // let s = JtJ.svd().unwrap();
250 // let max_s = s.S().column_vector().iter().next().unwrap();
251 // let min_s = s.S().column_vector().iter().last().unwrap();
252 // if RealScalar::abs(*min_s) < R::from64(1e-12) { // Avoid division by a near-zero number
253 // dbg!("Infinity");
254 // } else {
255 // dbg!(max_s.to64() / min_s.to64());
256 // }
257
258 // Solve for delta_x
259 let delta_x = match JtJ.llt(faer::Side::Lower) {
260 Ok(llt) => llt.solve(&Jtr),
261 Err(_) => {
262 // Step rejected
263 // println!("Step rejected due to invalid llt: {}", e);
264 // matmul(&mut JtJ, Accum::Replace, &Jbuf.transpose(), &Jbuf, R::one(), Par::Seq);
265 // lambda *= self.lambda_increase_factor;
266 // println!("New lambda: {}", lambda);
267 // continue;
268 JtJ.lblt(faer::Side::Lower).solve(&Jtr)
269 }
270 };
271 // TODO: If this fails; fail step
272 // let delta_x = llt.solve(&Jtr); // TODO: remove unnecessary allocations using scratch
273 // space: https://faer.veganb.tw/docs/dense-linalg/linsolve/
274
275 // Calculate x_new
276 let x_new = &x - delta_x.col(0); // TODO: remove allocation
277
278 // Evaluate cost at x_new
279 // Safety: x_new is non null, initialized, and read-only while param_slice is alive.
280 unsafe {
281 let param_slice = std::slice::from_raw_parts(x_new.as_ptr(), x_new.nrows());
282 objective.residuals_into(param_slice, Rbuf.as_mut());
283 }
284 let new_cost = Rbuf.squared_norm_l2() * R::from64(0.5);
285
286 // Check for invalid cost
287 if new_cost.is_nan() || new_cost.is_infinite() {
288 return MinimizationResult {
289 params: Vec::from_iter(x.iter().copied()),
290 fun: current_cost,
291 status: 4,
292 message: Some("Cost function became invalid (NaN or infinity)".to_string()),
293 };
294 }
295
296 // Calculate gain ratio for proper LM updates
297 let actual_reduction = current_cost - new_cost;
298
299 // Simplified predicted reduction: step_norm^2 * (lambda + gradient_norm)
300 let step_norm = delta_x.norm_l2();
301 let gradient_norm = Jtr.norm_l2();
302 let predicted_reduction = step_norm * step_norm * (lambda + gradient_norm);
303
304 let gain_ratio = if predicted_reduction < R::from64(1e-14)
305 || predicted_reduction.is_nan()
306 || predicted_reduction.is_infinite()
307 {
308 R::from64(-1.0) // Treat as bad step
309 } else {
310 actual_reduction / predicted_reduction
311 };
312
313 // Accept step if gain ratio is positive
314 if gain_ratio > R::zero() {
315 // Step accepted
316 x = x_new;
317 if new_cost.is_close_with_tolerance(current_cost, self.diff_tol_r, self.diff_tol_a)
318 {
319 // if RealScalar::abs(new_cost - current_cost) <= self.diff_tol_a + self.diff_tol_r * RealScalar::abs(current_cost) {
320 return MinimizationResult {
321 params: Vec::from_iter(x.iter().copied()),
322 fun: new_cost,
323 status: 7,
324 message: Some(
325 "Terminated because lack of sufficient cost improvement.".to_string(),
326 ),
327 };
328 }
329 current_cost = new_cost;
330
331 // Update lambda based on gain ratio quality
332 if gain_ratio > R::from64(0.75) {
333 // Very good step, decrease damping aggressively
334 lambda /= self.lambda_decrease_factor;
335 } else if gain_ratio > R::from64(0.25) {
336 // Moderate step, decrease damping moderately
337 lambda /= R::from64(2.0);
338 }
339 // For gain_ratio <= 0.25, keep lambda unchanged
340
341 lambda = if lambda < self.minimum_lambda {
342 self.minimum_lambda
343 } else {
344 lambda
345 };
346
347 // Check for invalid parameters
348 for param in x.iter() {
349 if param.is_nan() || param.is_infinite() {
350 return MinimizationResult {
351 params: Vec::from_iter(x.iter().copied()),
352 fun: current_cost,
353 status: 5,
354 message: Some(
355 "Parameters became invalid (NaN or infinity)".to_string(),
356 ),
357 };
358 }
359 }
360
361 // Safety: x is non null, initialized, and read-only while param_slice is alive.
362 unsafe {
363 // Calculate new residuals and jacobian
364 let param_slice = std::slice::from_raw_parts(x.as_ptr(), x.nrows());
365 objective.residuals_and_jacobian_into(
366 param_slice,
367 Rbuf.as_mut(),
368 Jbuf.as_mut(),
369 );
370 // dbg!(&Jbuf);
371 }
372
373 // Calculate hessian and gradient approximation to form new linear system problem
374 matmul(
375 &mut JtJ,
376 Accum::Replace,
377 Jbuf.transpose(),
378 &Jbuf,
379 R::one(),
380 Par::Seq,
381 );
382 matmul(
383 &mut Jtr,
384 Accum::Replace,
385 Jbuf.transpose(),
386 Rbuf.as_mat(),
387 R::one(),
388 Par::Seq,
389 );
390
391 // Check gradient norm after step acceptance
392 let gradient_norm = Jtr.norm_l2();
393 if gradient_norm < self.minimum_gradient_norm {
394 return MinimizationResult {
395 params: Vec::from_iter(x.iter().copied()),
396 fun: current_cost,
397 status: 6,
398 message: Some(
399 "Gradient norm too small - optimization stagnated".to_string(),
400 ),
401 };
402 }
403 } else {
404 // Step rejected - poor gain ratio
405 matmul(
406 &mut JtJ,
407 Accum::Replace,
408 Jbuf.transpose(),
409 &Jbuf,
410 R::one(),
411 Par::Seq,
412 );
413
414 // Update lambda based on how bad the step was
415 if gain_ratio < R::from64(-0.5) {
416 // Very bad step, increase damping aggressively
417 lambda *= self.lambda_increase_factor * R::from64(2.0);
418 } else {
419 // Moderately bad step, increase damping normally
420 lambda *= self.lambda_increase_factor;
421 }
422 }
423
424 // Check for convergence
425 if delta_x.norm_l2() < self.tolerance {
426 let params_out = Vec::from_iter(x.iter().copied());
427 return MinimizationResult::simple_success(params_out, current_cost);
428 }
429 }
430
431 // TODO: better result converying the failure
432 MinimizationResult {
433 params: vec![],
434 fun: R::from64(1.0),
435 status: 1,
436 message: None,
437 }
438 }
439}