scirs2_optimize/
roots_anderson.rs

1use crate::error::OptimizeResult;
2use crate::result::OptimizeResults;
3use crate::roots::Options;
4use scirs2_core::ndarray::{Array1, Array2, ArrayBase, Data, Ix1};
5
6/// Implements Anderson mixing method for root finding
7///
8/// Anderson mixing is an acceleration technique that combines the current and previous
9/// iterates to accelerate convergence. It can be viewed as a type of multisecant quasi-Newton
10/// method that uses information from multiple previous iterations.
11#[allow(dead_code)]
12pub fn root_anderson<F, J, S>(
13    func: F,
14    x0: &ArrayBase<S, Ix1>,
15    _jacobian_fn: Option<J>,
16    options: &Options,
17) -> OptimizeResult<OptimizeResults<f64>>
18where
19    F: Fn(&[f64]) -> Array1<f64>,
20    J: Fn(&[f64]) -> Array2<f64>,
21    S: Data<Elem = f64>,
22{
23    // Get options or use defaults
24    let xtol = options.xtol.unwrap_or(1e-8);
25    let ftol = options.ftol.unwrap_or(1e-8);
26    let maxfev = options.maxfev.unwrap_or(100 * x0.len());
27
28    // Anderson specific parameters
29    let m = options.m_anderson.unwrap_or(5).min(x0.len()); // Number of previous iterations to use
30    let beta = options.beta_anderson.unwrap_or(0.5); // Damping parameter
31
32    // Initialize variables
33    let n = x0.len();
34    let mut x = x0.to_owned();
35    let mut f = func(x.as_slice().unwrap());
36    let mut nfev = 1;
37
38    // Storage for previous residuals and step differences
39    let mut residuals: Vec<Array1<f64>> = Vec::with_capacity(m + 1);
40    let mut steps: Vec<Array1<f64>> = Vec::with_capacity(m + 1);
41
42    // Main iteration loop
43    let mut iter = 0;
44    let mut converged = false;
45
46    while iter < maxfev {
47        // Check if we've converged in function values
48        let f_norm = f.iter().map(|&fi| fi.powi(2)).sum::<f64>().sqrt();
49        if f_norm < ftol {
50            converged = true;
51            break;
52        }
53
54        // Simple step (fixed-point iteration): x_{n+1} = x_n - f(x_n)
55        let mut x_simple = x.clone();
56        for i in 0..n {
57            x_simple[i] = x[i] - f[i];
58        }
59
60        // Apply Anderson mixing if we have previous iterations
61        let x_new = if iter >= 1 {
62            // Add current residual to the history
63            residuals.push(f.clone());
64
65            // Use only m most recent iterations
66            if residuals.len() > m {
67                residuals.remove(0);
68                steps.remove(0);
69            }
70
71            // Number of previous iterations to use (limited by available history)
72            let mk = residuals.len() - 1;
73
74            if mk >= 1 {
75                // Construct the matrix of residual differences
76                let mut df = Array2::zeros((n, mk));
77                for j in 0..mk {
78                    for i in 0..n {
79                        df[[i, j]] = residuals[j + 1][i] - residuals[0][i];
80                    }
81                }
82
83                // Solve the least squares problem: min ||residuals[0] - df * alpha||
84                // To find coefficients alpha that best combine previous iterations
85                let alpha = match solve_least_squares(&df, &(-&residuals[0])) {
86                    Some(a) => a,
87                    None => {
88                        // If the least squares problem fails, use simple iteration
89                        let mut x_next = x.clone();
90                        for i in 0..n {
91                            x_next[i] = (1.0 - beta) * x[i] + beta * x_simple[i];
92                        }
93                        x_next
94                    }
95                };
96
97                // Apply the mixing to compute the next iterate
98                let mut x_next = x.clone();
99
100                // Start with weighted current point
101                for i in 0..n {
102                    x_next[i] = x_simple[i] * (1.0 - alpha.iter().sum::<f64>());
103                }
104
105                // Add weighted previous steps
106                for j in 0..mk {
107                    for i in 0..n {
108                        x_next[i] += alpha[j] * steps[j][i];
109                    }
110                }
111
112                // Apply damping
113                for i in 0..n {
114                    x_next[i] = (1.0 - beta) * x[i] + beta * x_next[i];
115                }
116
117                x_next
118            } else {
119                // Use simple iteration if we don't have enough history
120                let mut x_next = x.clone();
121                for i in 0..n {
122                    x_next[i] = (1.0 - beta) * x[i] + beta * x_simple[i];
123                }
124                x_next
125            }
126        } else {
127            // For the first step, just use damped fixed-point iteration
128            let mut x_next = x.clone();
129            for i in 0..n {
130                x_next[i] = (1.0 - beta) * x[i] + beta * x_simple[i];
131            }
132            x_next
133        };
134
135        // Save step for next iteration
136        steps.push(x_simple);
137
138        // Evaluate function at the new point
139        let f_new = func(x_new.as_slice().unwrap());
140        nfev += 1;
141
142        // Check convergence on parameters
143        let step_norm = (0..n)
144            .map(|i| (x_new[i] - x[i]).powi(2))
145            .sum::<f64>()
146            .sqrt();
147        let x_norm = (0..n).map(|i| x[i].powi(2)).sum::<f64>().sqrt();
148
149        if step_norm < xtol * (1.0 + x_norm) {
150            converged = true;
151            x = x_new;
152            f = f_new;
153            break;
154        }
155
156        // Update variables for next iteration
157        x = x_new;
158        f = f_new;
159
160        iter += 1;
161    }
162
163    // Create and return result
164    let mut result = OptimizeResults::default();
165    result.x = x;
166    result.fun = f.iter().map(|&fi| fi.powi(2)).sum::<f64>();
167    result.nfev = nfev;
168    result.nit = iter;
169    result.success = converged;
170
171    if converged {
172        result.message = "Root finding converged successfully".to_string();
173    } else {
174        result.message = "Maximum number of function evaluations reached".to_string();
175    }
176
177    Ok(result)
178}
179
180/// Solves a least squares problem ||Ax - b|| using QR decomposition
181#[allow(dead_code)]
182pub fn solve_least_squares(a: &Array2<f64>, b: &Array1<f64>) -> Option<Array1<f64>> {
183    let (m, n) = (a.nrows(), a.ncols());
184    if m < n {
185        return None; // Underdetermined system
186    }
187
188    // Simple QR decomposition using Householder reflections
189    let mut q: Array2<f64> = Array2::eye(m);
190    let mut r = a.clone();
191
192    for k in 0..n {
193        // Householder transformation to introduce zeros below the diagonal
194        let mut x = Array1::zeros(m - k);
195        for i in k..m {
196            x[i - k] = r[[i, k]];
197        }
198
199        let x_norm = x.iter().map(|&xi| xi.powi(2)).sum::<f64>().sqrt();
200        if x_norm < 1e-10 {
201            continue; // Skip if column is already zeroed
202        }
203
204        // Compute Householder vector
205        let mut v = x.clone();
206        v[0] += if x[0] >= 0.0 { x_norm } else { -x_norm };
207
208        let v_norm = v.iter().map(|&vi| vi.powi(2)).sum::<f64>().sqrt();
209        if v_norm < 1e-10 {
210            continue;
211        }
212
213        // Normalize v
214        for i in 0..v.len() {
215            v[i] /= v_norm;
216        }
217
218        // Apply Householder transformation to R
219        for j in k..n {
220            let mut dot_product = 0.0;
221            for i in 0..v.len() {
222                dot_product += v[i] * r[[i + k, j]];
223            }
224
225            let factor = 2.0 * dot_product;
226            for i in 0..v.len() {
227                r[[i + k, j]] -= factor * v[i];
228            }
229        }
230
231        // Apply Householder transformation to Q
232        for j in 0..m {
233            let mut dot_product = 0.0;
234            for i in 0..v.len() {
235                dot_product += v[i] * q[[j, i + k]];
236            }
237
238            let factor = 2.0 * dot_product;
239            for i in 0..v.len() {
240                q[[j, i + k]] -= factor * v[i];
241            }
242        }
243    }
244
245    // Now we have R and Q (actually Q^T) such that A = QR
246    // To solve least squares, we compute x = R^-1 Q^T b
247
248    // Apply Q^T to b: y = Q^T b
249    let mut y = Array1::zeros(m);
250    for i in 0..m {
251        for j in 0..m {
252            y[i] += q[[i, j]] * b[j];
253        }
254    }
255
256    // Back-substitution to solve Rx = y
257    let mut x = Array1::zeros(n);
258
259    for i in (0..n).rev() {
260        let mut sum: f64 = y[i];
261        for j in (i + 1)..n {
262            sum -= r[[i, j]] * x[j];
263        }
264
265        if r[[i, i]].abs() < 1e-10 {
266            return None; // Singular matrix
267        }
268
269        x[i] = sum / r[[i, i]];
270    }
271
272    Some(x)
273}