scirs2_optimize/
roots_krylov.rs

1use crate::error::OptimizeResult;
2use crate::result::OptimizeResults;
3use crate::roots::Options;
4use scirs2_core::ndarray::{s, Array1, Array2, ArrayBase, Data, Ix1};
5
6/// Implements the Krylov method accelerated by GMRES for root finding
7///
8/// This method uses Krylov subspace techniques to accelerate the convergence
9/// of root finding algorithms, particularly for large-scale systems. It combines
10/// the Levenberg-Marquardt approach with Krylov subspace methods for the linear solve.
11#[allow(dead_code)]
12pub fn root_krylov<F, J, S>(
13    func: F,
14    x0: &ArrayBase<S, Ix1>,
15    jacobian_fn: Option<J>,
16    options: &Options,
17) -> OptimizeResult<OptimizeResults<f64>>
18where
19    F: Fn(&[f64]) -> Array1<f64>,
20    J: Fn(&[f64]) -> Array2<f64>,
21    S: Data<Elem = f64>,
22{
23    // Get options or use defaults
24    let xtol = options.xtol.unwrap_or(1e-8);
25    let ftol = options.ftol.unwrap_or(1e-8);
26    let maxfev = options.maxfev.unwrap_or(100 * x0.len());
27    let eps = options.eps.unwrap_or(1e-8);
28
29    // Initialize variables
30    let n = x0.len();
31    let mut x = x0.to_owned();
32    let mut f = func(x.as_slice().unwrap());
33    let mut nfev = 1;
34    let mut njev = 0;
35
36    // Function to compute numerical Jacobian
37    let compute_numerical_jac =
38        |x_values: &[f64], f_values: &Array1<f64>| -> (Array2<f64>, usize) {
39            let mut jac = Array2::zeros((f_values.len(), x_values.len()));
40            let mut count = 0;
41
42            for j in 0..x_values.len() {
43                let mut x_h = Vec::from(x_values);
44                x_h[j] += eps;
45                let f_h = func(&x_h);
46                count += 1;
47
48                for i in 0..f_values.len() {
49                    jac[[i, j]] = (f_h[i] - f_values[i]) / eps;
50                }
51            }
52
53            (jac, count)
54        };
55
56    // Function to get Jacobian (either analytical or numerical)
57    let get_jacobian = |x_values: &[f64],
58                        f_values: &Array1<f64>,
59                        jac_fn: &Option<J>|
60     -> (Array2<f64>, usize, usize) {
61        match jac_fn {
62            Some(func) => {
63                let j = func(x_values);
64                (j, 0, 1)
65            }
66            None => {
67                let (j, count) = compute_numerical_jac(x_values, f_values);
68                (j, count, 0)
69            }
70        }
71    };
72
73    // Compute initial Jacobian
74    let (mut jac, nfev_inc, njev_inc) = get_jacobian(x.as_slice().unwrap(), &f, &jacobian_fn);
75    nfev += nfev_inc;
76    njev += njev_inc;
77
78    // Main iteration loop
79    let mut iter = 0;
80    let mut converged = false;
81
82    // Levenberg-Marquardt damping parameter
83    let mut lambda = 0.01;
84    let lambda_adjustment = 10.0;
85
86    while iter < maxfev {
87        // Check if we've converged in function values
88        let f_norm = f.iter().map(|&fi| fi.powi(2)).sum::<f64>().sqrt();
89        if f_norm < ftol {
90            converged = true;
91            break;
92        }
93
94        // Solve using the GMRES-accelerated approach:
95        // Instead of directly solving (J^T J + λI) δ = -J^T f
96        // We use the GMRES method to iteratively solve J δ = -f
97
98        // Initialize the Krylov subspace
99        let max_krylov_dim = std::cmp::min(n, 20); // Limit Krylov dimension
100
101        // Run a modified GMRES method with Levenberg-Marquardt damping
102        let delta = gmres_solve(&jac, &(-&f), lambda, max_krylov_dim);
103
104        // Apply the step
105        let mut x_new = x.clone();
106        for i in 0..n {
107            x_new[i] += delta[i];
108        }
109
110        // Evaluate function at the new point
111        let f_new = func(x_new.as_slice().unwrap());
112        nfev += 1;
113
114        let f_new_norm = f_new.iter().map(|&fi| fi.powi(2)).sum::<f64>().sqrt();
115        let f_norm = f.iter().map(|&fi| fi.powi(2)).sum::<f64>().sqrt();
116
117        // Adaptive Levenberg-Marquardt strategy
118        if f_new_norm < f_norm {
119            // Step decreased residual, decrease damping
120            lambda /= lambda_adjustment;
121
122            // Update variables for next iteration
123            x = x_new;
124            f = f_new;
125
126            // Update Jacobian for next iteration
127            let (new_jac, nfev_delta, njev_delta) =
128                get_jacobian(x.as_slice().unwrap(), &f, &jacobian_fn);
129            jac = new_jac;
130            nfev += nfev_delta;
131            njev += njev_delta;
132        } else {
133            // Step increased residual, increase damping and retry
134            lambda *= lambda_adjustment;
135        }
136
137        // Check convergence on parameters
138        let step_norm = delta.iter().map(|&di| di.powi(2)).sum::<f64>().sqrt();
139        let x_norm = x.iter().map(|&xi| xi.powi(2)).sum::<f64>().sqrt();
140
141        if step_norm < xtol * (1.0 + x_norm) {
142            converged = true;
143            break;
144        }
145
146        iter += 1;
147    }
148
149    // Create and return result
150    let mut result = OptimizeResults::default();
151    result.x = x;
152    result.fun = f.iter().map(|&fi| fi.powi(2)).sum::<f64>();
153
154    // Store the final Jacobian
155    let (jac_vec, _) = jac.into_raw_vec_and_offset();
156    result.jac = Some(jac_vec);
157
158    result.nfev = nfev;
159    result.njev = njev;
160    result.nit = iter;
161    result.success = converged;
162
163    if converged {
164        result.message = "Root finding converged successfully".to_string();
165    } else {
166        result.message = "Maximum number of function evaluations reached".to_string();
167    }
168
169    Ok(result)
170}
171
172/// Implements a simplified GMRES (Generalized Minimal Residual) method
173/// for solving linear systems Ax = b with Levenberg-Marquardt regularization
174#[allow(dead_code)]
175fn gmres_solve(a: &Array2<f64>, b: &Array1<f64>, lambda: f64, maxiter: usize) -> Array1<f64> {
176    let (_m, n) = a.dim();
177
178    // Regularized system: solve (A^T A + λI) x = A^T b
179    // We implement GMRES on this system directly
180
181    // Initialize solution vector
182    let mut x = Array1::zeros(n);
183
184    // Initial residual: r = b - A*x (x is zero, so r = b)
185    let r = b.clone();
186    let r_norm_initial = r.iter().map(|&ri| ri.powi(2)).sum::<f64>().sqrt();
187
188    // If the initial residual is small, return zero solution
189    if r_norm_initial < 1e-10 {
190        return x;
191    }
192
193    // Arnoldi basis vectors (orthonormal basis for the Krylov subspace)
194    let mut v = Vec::with_capacity(maxiter + 1);
195
196    // First basis vector: v[0] = r / ||r||
197    let mut v0 = r.clone();
198    v0 /= r_norm_initial;
199    v.push(v0);
200
201    // Hessenberg matrix (stores the projection of A onto the Krylov subspace)
202    let mut h = Array2::zeros((maxiter + 1, maxiter));
203
204    // Construct the Krylov subspace and Hessenberg matrix
205    for j in 0..maxiter {
206        // Apply the matrix operator with Levenberg-Marquardt regularization:
207        // w = A * v[j]
208        let w = a.dot(&v[j]);
209
210        // Apply the regularization: w = A^T * w + λ * v[j]
211        let atw = a.t().dot(&w);
212        let mut w_regularized = atw;
213
214        // Add regularization term λ * v[j]
215        for i in 0..n {
216            w_regularized[i] += lambda * v[j][i];
217        }
218
219        // Orthogonalize against previous vectors (Modified Gram-Schmidt)
220        for i in 0..=j {
221            h[[i, j]] = v[i].dot(&w_regularized);
222            for k in 0..n {
223                w_regularized[k] -= h[[i, j]] * v[i][k];
224            }
225        }
226
227        // Compute the norm of the orthogonalized vector
228        let w_norm = w_regularized
229            .iter()
230            .map(|&wi| wi.powi(2))
231            .sum::<f64>()
232            .sqrt();
233
234        // Check for linear dependence
235        if w_norm < 1e-10 {
236            break;
237        }
238
239        // Add the next basis vector: v[j+1] = w / ||w||
240        h[[j + 1, j]] = w_norm;
241        let vj1 = w_regularized / w_norm;
242        v.push(vj1);
243
244        // Check if we have enough vectors (we've found an invariant subspace)
245        if j >= n - 1 {
246            break;
247        }
248    }
249
250    // Solve the least-squares problem in the Krylov subspace
251    let k = v.len() - 1; // Actual dimension of the Krylov subspace
252
253    // Form the system H * y = ||r||_2 * e_1
254    let mut g = Array1::zeros(k + 1);
255    g[0] = r_norm_initial;
256
257    // Setup the Hessenberg matrix for solving
258    let h_ls = h.slice(s![0..k + 1, 0..k]).to_owned();
259
260    // Solve the least-squares problem using normal equations
261    let h_t = h_ls.t();
262    let normal_matrix = h_t.dot(&h_ls);
263    let rhs = h_t.dot(&g);
264
265    // Use direct solve via pseudo-inverse
266    let y = solve_normal_equations(&normal_matrix, &rhs);
267
268    // Form the solution in the original space: x = V * y
269    for j in 0..k {
270        for i in 0..n {
271            x[i] += y[j] * v[j][i];
272        }
273    }
274
275    x
276}
277
278/// Solves the normal equations A^T A x = A^T b for least squares problems
279#[allow(dead_code)]
280fn solve_normal_equations(a: &Array2<f64>, b: &Array1<f64>) -> Array1<f64> {
281    let n = a.dim().0;
282    let mut x = Array1::zeros(n);
283
284    // Simple Cholesky decomposition (A^T A is symmetric positive definite)
285    let mut l = Array2::zeros((n, n));
286
287    for i in 0..n {
288        for j in 0..=i {
289            let mut sum = a[[i, j]];
290
291            for k in 0..j {
292                sum -= l[[i, k]] * l[[j, k]];
293            }
294
295            if i == j {
296                l[[i, j]] = (sum + 1e-10).sqrt(); // Add small regularization for stability
297            } else {
298                l[[i, j]] = sum / l[[j, j]];
299            }
300        }
301    }
302
303    // Forward substitution to solve L * y = b
304    let mut y = Array1::zeros(n);
305    for i in 0..n {
306        let mut sum = b[i];
307        for j in 0..i {
308            sum -= l[[i, j]] * y[j];
309        }
310        y[i] = sum / l[[i, i]];
311    }
312
313    // Backward substitution to solve L^T * x = y
314    for i in (0..n).rev() {
315        let mut sum = y[i];
316        for j in (i + 1)..n {
317            sum -= l[[j, i]] * x[j];
318        }
319        x[i] = sum / l[[i, i]];
320    }
321
322    x
323}