1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232
//! See [`optimize`] for documentation on the Levenberg-Marquardt optimization algorithm. #![no_std] use nalgebra::{ allocator::Allocator, constraint::{DimEq, ShapeConstraint}, dimension::{DimMin, DimMinimum}, storage::{ContiguousStorageMut, Storage}, DefaultAllocator, Dim, DimName, Matrix, MatrixMN, RealField, Vector, }; use num_traits::FromPrimitive; #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub struct Config<N> { pub max_iterations: usize, pub consecutive_divergence_limit: usize, pub initial_lambda: N, pub lambda_convege: N, pub lambda_diverge: N, pub threshold: N, } impl<N> Default for Config<N> where N: FromPrimitive, { fn default() -> Self { Self { max_iterations: 1000, consecutive_divergence_limit: 5, initial_lambda: N::from_f32(50.0) .expect("leverberg-marquardt vector and matrix type cant store 50.0"), lambda_convege: N::from_f32(0.8) .expect("leverberg-marquardt vector and matrix type cant store 0.8"), lambda_diverge: N::from_f32(2.0) .expect("leverberg-marquardt vector and matrix type cant store 2.0"), threshold: N::from_f32(0.0) .expect("leverberg-marquardt vector and matrix type cant store 0.0"), } } } /// Note that the differentials and state vector are represented with column vectors. /// This is atypical from the normal way it is done in mathematics. This is done because /// nalgebra is column-major. A nalgebra `Vector` is a column vector. /// /// Make sure that you create your Jacobian such that it is several fixed length /// column vectors rather than several row vectors as per normal. If you have already /// computed it with row vectors, then you can take the transpose. /// /// It is recommended to make the number of columns dynamic unless you have a small fixed /// number of data-points. /// /// `max_iterations` limits the number of times the initial guess will be updated. /// /// `consecutive_divergence_limit` limits the number of times that lambda can diverge /// consecutively from Gauss-Newton due to a failed improvement. Once the /// solution is as good as possible, it will begin regressing to gradient descent. This /// limit prevents it from wasting the remaining cycles of the algorithm. /// /// `initial_lambda` defines the initial lambda value. As lambda grows higher, /// Levenberg-Marquardt approaches gradient descent, which is better at converging to a distant /// minima. As lambda grows lower, Levenberg-Marquardt approaches Gauss-Newton, which allows faster /// convergence closer to the minima. A lambda of `0.0` would imply that it is purely based on /// Gauss-Newton approximation. Please do not set lambda to exactly `0.0` or the `lambda_scale` will be unable to /// increase lambda since it does so through multiplication. /// /// `lambda_converge` must be set to a value below `1.0`. On each iteration of Levenberg-Marquardt, /// the lambda is used as-is and multiplied by `lambda_converge`. If the original lambda or the /// new lambda is better, that lambda becomes the new lambda. If neither are better than the /// previous sum-of-squares, then lambda is multiplied by `lambda_diverge`. /// /// `lambda_diverge` must be set to a value above `1.0` and highly recommended to set it **above** /// `lambda_converge^-1` (it will re-test an already-used lambda otherwise). On each iteration, /// if the sum-of-squares regresses, then lambda is multiplied by `lambda_diverge` to move closer /// to gradient descent in hopes that it will cause it to converge. /// /// `threshold` is the point at which the average-of-squares is low enough that the algorithm can /// terminate. This exists so that the algorithm can short-circuit and exit early if the /// solution was easy to find. Set this to `0.0` if you want it to continue for all `max_iterations`. /// You might do that if you always have a fixed amount of time per optimization, such as when /// processing live video frames. /// /// `init` is the initial parameter guess. Make sure to set `init` close to the actual solution. /// It is recommended to use a sample consensus algorithm to get a close initial approximation. /// /// `normalize` allows the parameter guess to be normalized on each iteration. It can be pushed into /// a slightly incorrect state on each iteration and this can be used to correct it. This might be something /// like an angle which exceeds 2 * pi. It might technically be correct, but you want to wrap it back around. /// This is also useful when a normal vector or unit quaternion is involved since those need to be kept /// normalized throughout the optimization procedure. /// /// `residuals` must return the difference between the expected value and the output of the /// function being optimized. This is returned as a matrix where the number of residuals (rows) /// that are present in each column must correspond to the number of columns in each /// Jacobian returned by `jacobians`. /// /// `jacobians` is a function that takes in the current guess and produces all the Jacobian /// matrices of the negative residuals in respect to the parameter. Each row should correspond to /// a dimension of the parameter vector and each column should correspond to a row in the residual matrix. /// You can pass in the Jacobian of as many residuals as you would like on each iteration, /// so long as the residuals returned by `residuals` has the same number of residuals per column. /// Only the Jacobian and the residuals are required to perform Levenberg-Marquardt optimization. /// You may need to caputure your observances in the closure to compute the Jacobian, but /// they are not arguments since they are constants to Levenberg-Marquardt. /// /// `N` is the type parameter of the data type that is stored in the matrix (`f32`). /// /// `P` is the number of parameter variables being optimized. /// /// `S` is the number of samples used in optimization. /// /// `J` is the number of rows per sample returned and the number of columns in the Jacobian. /// /// `PS` is the nalgebra storage used for the parameter vector. /// /// `RS` is the nalgebra storage used for the residual matrix. /// /// `JS` is the nalgebra storage used for the Jacobian matrix. /// /// `IJ` is the iterator over the Jacobian matrices of each sample. pub fn optimize<N, P, S, J, PS, RS, JS, IJ>( config: Config<N>, init: Vector<N, P, PS>, normalize: impl Fn(Vector<N, P, PS>) -> Vector<N, P, PS>, residuals: impl Fn(&Vector<N, P, PS>) -> Matrix<N, J, S, RS>, jacobians: impl Fn(&Vector<N, P, PS>) -> IJ, ) -> Vector<N, P, PS> where N: RealField + FromPrimitive, P: DimMin<P> + DimName, S: Dim, J: DimName, PS: ContiguousStorageMut<N, P> + Clone, RS: Storage<N, J, S>, JS: Storage<N, P, J>, IJ: Iterator<Item = Matrix<N, P, J, JS>>, DefaultAllocator: Allocator<N, J, P>, DefaultAllocator: Allocator<N, P, P>, DefaultAllocator: Allocator<N, P, Buffer = PS>, ShapeConstraint: DimEq<DimMinimum<P, P>, P>, { let mut lambda = config.initial_lambda; let mut guess = init; let mut res = residuals(&guess); let mut sum_of_squares = res.norm_squared(); let mut consecutive_divergences = 0; let total = N::from_usize(res.len()) .expect("there were more items in the vector than could be represented by the type"); for _ in 0..config.max_iterations { // Next step lambda. let smaller_lambda = lambda * config.lambda_convege; // Iterate through all the Jacobians to extract the approximate Hessian and the gradients. let (hessian, gradients) = jacobians(&guess).zip(res.column_iter()).fold( (nalgebra::zero(), nalgebra::zero()), |(hessian, gradients): (MatrixMN<N, P, P>, Vector<N, P, PS>), (jacobian, res)| { ( hessian + &jacobian * jacobian.transpose(), gradients + &jacobian * res, ) }, ); // Get a tuple of the lambda, guess, residual, and sum-of-squares. // Returns an option because it may not be possible to solve the inverse. let lam_ges_res_sum = |lam| { // Compute JJᵀ + λ*diag(JJᵀ). let mut hessian_lambda_diag = hessian.clone(); let new_diag = hessian_lambda_diag.map_diagonal(|n| n * (lam + N::one())); hessian_lambda_diag.set_diagonal(&new_diag); // Invert JᵀJ + λ*diag(JᵀJ) and solve for delta. let delta = hessian_lambda_diag .try_inverse() .map(|inv_jjl| inv_jjl * &gradients); // Compute the new guess, residuals, and sum-of-squares. let vars = delta.map(|delta| { let ges = normalize(&guess + delta); let res = residuals(&ges); let sum = res.norm_squared(); (lam, ges, res, sum) }); // If the sum-of-squares is infinite or NaN it shouldn't be allowed through. vars.filter(|vars| vars.3.is_finite()) }; // Select the vars that minimize the sum-of-squares the most. let new_vars = match (lam_ges_res_sum(smaller_lambda), lam_ges_res_sum(lambda)) { (Some(s_vars), Some(o_vars)) => Some(if s_vars.3 < o_vars.3 { s_vars } else { o_vars }), (Some(vars), None) | (None, Some(vars)) => Some(vars), (None, None) => None, }; if let Some((n_lam, n_ges, n_res, n_sum)) = new_vars { // We didn't see a decrease in the new state. if n_sum > sum_of_squares { // Increase lambda twice and go to the next iteration. // Increase twice so that the new two tested lambdas are different than current. lambda *= config.lambda_diverge; consecutive_divergences += 1; } else { // There was a decrease, so update everything. lambda = n_lam; guess = n_ges; res = n_res; sum_of_squares = n_sum; consecutive_divergences = 0; } } else { // We were unable to take the inverse, so increase lambda in hopes that it may // cause the matrix to become invertible. lambda *= config.lambda_diverge; consecutive_divergences += 1; } // Terminate early if we hit the consecutive divergence limit. if consecutive_divergences == config.consecutive_divergence_limit { break; } // We can terminate early if the sum of squares is below the threshold. if sum_of_squares < config.threshold * total { break; } } guess }