scirs2_optimize/unconstrained/
sparse_optimization.rs

1//! Sparse optimization algorithms for high-dimensional problems
2//!
3//! This module provides optimization algorithms that leverage sparse numerical
4//! differentiation and matrix operations for efficient handling of high-dimensional
5//! optimization problems.
6
7use crate::error::OptimizeError;
8use crate::sparse_numdiff::{sparse_jacobian, SparseFiniteDiffOptions};
9use crate::unconstrained::line_search::backtracking_line_search;
10use crate::unconstrained::result::OptimizeResult;
11use crate::unconstrained::utils::check_convergence;
12use crate::unconstrained::Options;
13use ndarray::{Array1, Array2, ArrayView1, Axis};
14use rand::Rng;
15use scirs2_sparse::{csr_array::CsrArray, sparray::SparseArray};
16
17/// Options for sparse optimization algorithms
18#[derive(Debug, Clone)]
19pub struct SparseOptimizationOptions {
20    /// Basic optimization options
21    pub base_options: Options,
22    /// Sparse finite difference options
23    pub sparse_options: SparseFiniteDiffOptions,
24    /// Known sparsity pattern for gradient/Jacobian (if None, auto-detected)
25    pub sparsity_pattern: Option<CsrArray<f64>>,
26    /// Whether to use sparse matrix operations throughout
27    pub use_sparse_operations: bool,
28    /// Memory threshold for switching to sparse operations (number of variables)
29    pub sparse_threshold: usize,
30}
31
32impl Default for SparseOptimizationOptions {
33    fn default() -> Self {
34        Self {
35            base_options: Options::default(),
36            sparse_options: SparseFiniteDiffOptions::default(),
37            sparsity_pattern: None,
38            use_sparse_operations: true,
39            sparse_threshold: 100, // Use sparse operations for 100+ variables
40        }
41    }
42}
43
44/// Sparse BFGS algorithm for large-scale optimization
45#[allow(dead_code)]
46pub fn minimize_sparse_bfgs<F, S>(
47    fun: F,
48    x0: Array1<f64>,
49    options: &SparseOptimizationOptions,
50) -> Result<OptimizeResult<S>, OptimizeError>
51where
52    F: Fn(&ArrayView1<f64>) -> S + Clone + Sync,
53    S: Into<f64> + Clone,
54{
55    let n = x0.len();
56    let base_opts = &options.base_options;
57    let sparse_opts = &options.sparse_options;
58
59    // Decide whether to use sparse operations based on problem size
60    let use_sparse = options.use_sparse_operations && n >= options.sparse_threshold;
61
62    // Initialize variables
63    let mut x = x0.to_owned();
64    let bounds = base_opts.bounds.as_ref();
65
66    // Ensure initial point is within bounds
67    if let Some(bounds) = bounds {
68        bounds.project(x.as_slice_mut().unwrap());
69    }
70
71    let mut f = fun(&x.view()).into();
72
73    // Calculate initial gradient
74    let mut g = if use_sparse && options.sparsity_pattern.is_some() {
75        // Use sparse gradient computation
76        compute_sparse_gradient(&fun, &x.view(), sparse_opts)?
77    } else {
78        // Use dense gradient computation - create a mutable wrapper
79        let mut fun_wrapper = |x_val: &ArrayView1<f64>| fun(x_val);
80        crate::unconstrained::utils::finite_difference_gradient(
81            &mut fun_wrapper,
82            &x.view(),
83            base_opts.eps,
84        )?
85    };
86
87    // Initialize inverse Hessian approximation
88    let mut h_inv = if use_sparse {
89        // For sparse case, we could use L-BFGS-like limited memory approach
90        // or a sparse approximation of the inverse Hessian
91        None
92    } else {
93        Some(Array2::eye(n))
94    };
95
96    // For sparse case, use L-BFGS-like storage
97    let mut s_history: Vec<Array1<f64>> = Vec::new();
98    let mut y_history: Vec<Array1<f64>> = Vec::new();
99    let max_history = 10; // L-BFGS memory
100
101    // Initialize counters
102    let mut iter = 0;
103    let mut nfev = 1; // Initial function evaluation
104
105    // Main loop
106    while iter < base_opts.max_iter {
107        // Check convergence on gradient
108        if g.mapv(|gi| gi.abs()).sum() < base_opts.gtol {
109            break;
110        }
111
112        // Compute search direction
113        let mut p = if use_sparse && !s_history.is_empty() {
114            // Use L-BFGS two-loop recursion for sparse case
115            compute_lbfgs_direction(&g, &s_history, &y_history)
116        } else if let Some(ref h) = h_inv {
117            // Use dense BFGS
118            -h.dot(&g)
119        } else {
120            // Fallback: steepest descent
121            -&g
122        };
123
124        // Project search direction for bounded optimization
125        if let Some(bounds) = bounds {
126            for i in 0..n {
127                let mut can_decrease = true;
128                let mut can_increase = true;
129
130                // Check if at boundary
131                if let Some(lb) = bounds.lower[i] {
132                    if x[i] <= lb + base_opts.eps {
133                        can_decrease = false;
134                    }
135                }
136                if let Some(ub) = bounds.upper[i] {
137                    if x[i] >= ub - base_opts.eps {
138                        can_increase = false;
139                    }
140                }
141
142                // Project gradient component
143                if (g[i] > 0.0 && !can_decrease) || (g[i] < 0.0 && !can_increase) {
144                    p[i] = 0.0;
145                }
146            }
147
148            // If no movement is possible, we're at a constrained optimum
149            if p.mapv(|pi| pi.abs()).sum() < 1e-10 {
150                break;
151            }
152        }
153
154        // Line search
155        let alpha_init = 1.0;
156        let mut fun_wrapper = |x_val: &ArrayView1<f64>| fun(x_val);
157        let (alpha, f_new) = backtracking_line_search(
158            &mut fun_wrapper,
159            &x.view(),
160            f,
161            &p.view(),
162            &g.view(),
163            alpha_init,
164            0.0001,
165            0.5,
166            bounds,
167        );
168
169        nfev += 1;
170
171        // Update position
172        let s = alpha * &p;
173        let x_new = &x + &s;
174
175        // Check step size convergence
176        if s.mapv(|si| si.abs()).sum() < base_opts.xtol {
177            x = x_new;
178            break;
179        }
180
181        // Calculate new gradient
182        let g_new = if use_sparse && options.sparsity_pattern.is_some() {
183            compute_sparse_gradient(&fun, &x_new.view(), sparse_opts)?
184        } else {
185            let mut fun_wrapper = |x_val: &ArrayView1<f64>| fun(x_val);
186            crate::unconstrained::utils::finite_difference_gradient(
187                &mut fun_wrapper,
188                &x_new.view(),
189                base_opts.eps,
190            )?
191        };
192
193        nfev += n; // Approximate function evaluations for gradient
194
195        // Gradient difference
196        let y = &g_new - &g;
197
198        // Check convergence on function value
199        if check_convergence(
200            f - f_new,
201            0.0,
202            g_new.mapv(|gi| gi.abs()).sum(),
203            base_opts.ftol,
204            0.0,
205            base_opts.gtol,
206        ) {
207            x = x_new;
208            g = g_new;
209            break;
210        }
211
212        // Update approximation
213        let s_dot_y = s.dot(&y);
214        if s_dot_y > 1e-10 {
215            if use_sparse {
216                // Update L-BFGS history
217                s_history.push(s.clone());
218                y_history.push(y.clone());
219
220                // Keep only recent history
221                if s_history.len() > max_history {
222                    s_history.remove(0);
223                    y_history.remove(0);
224                }
225            } else if let Some(ref mut h) = h_inv {
226                // Dense BFGS update
227                let rho = 1.0 / s_dot_y;
228                let i_mat = Array2::eye(n);
229
230                let y_col = y.clone().insert_axis(Axis(1));
231                let s_row = s.clone().insert_axis(Axis(0));
232                let y_s_t = y_col.dot(&s_row);
233                let term1 = &i_mat - &(&y_s_t * rho);
234
235                let s_col = s.clone().insert_axis(Axis(1));
236                let y_row = y.clone().insert_axis(Axis(0));
237                let s_y_t = s_col.dot(&y_row);
238                let term2 = &i_mat - &(&s_y_t * rho);
239
240                let term3 = term1.dot(h);
241                *h = term3.dot(&term2) + rho * s_col.dot(&s_row);
242            }
243        }
244
245        // Update variables for next iteration
246        x = x_new;
247        f = f_new;
248        g = g_new;
249        iter += 1;
250    }
251
252    // Final check for bounds
253    if let Some(bounds) = bounds {
254        bounds.project(x.as_slice_mut().unwrap());
255    }
256
257    // Use original function for final value
258    let final_fun = fun(&x.view());
259
260    Ok(OptimizeResult {
261        x,
262        fun: final_fun,
263        nit: iter,
264        func_evals: nfev,
265        nfev,
266        success: iter < base_opts.max_iter,
267        message: if iter < base_opts.max_iter {
268            "Sparse optimization terminated successfully.".to_string()
269        } else {
270            "Maximum iterations reached.".to_string()
271        },
272        jacobian: Some(g),
273        hessian: None,
274    })
275}
276
277/// Compute sparse gradient using sparse numerical differentiation
278#[allow(dead_code)]
279pub fn compute_sparse_gradient<F, S>(
280    fun: &F,
281    x: &ArrayView1<f64>,
282    options: &SparseFiniteDiffOptions,
283) -> Result<Array1<f64>, OptimizeError>
284where
285    F: Fn(&ArrayView1<f64>) -> S + Clone + Sync,
286    S: Into<f64> + Clone,
287{
288    // Create wrapper that returns Array1<f64> instead of scalar
289    let wrapper_fn = |x_val: &ArrayView1<f64>| -> Array1<f64> {
290        let result: f64 = fun(x_val).into();
291        Array1::from(vec![result])
292    };
293
294    // Create a 1x1 sparsity pattern (gradient of scalar function)
295    let mut rows = vec![0];
296    let mut cols = vec![];
297    let mut data = vec![];
298
299    for i in 0..x.len() {
300        rows.push(0);
301        cols.push(i);
302        data.push(1.0);
303    }
304    rows.remove(0); // Remove the first element we added
305
306    let sparsity =
307        CsrArray::from_triplets(&rows, &cols, &data, (1, x.len()), false).map_err(|e| {
308            OptimizeError::ValueError(format!("Failed to create sparsity pattern: {}", e))
309        })?;
310
311    // Compute sparse Jacobian (which gives us the gradient)
312    let jacobian = sparse_jacobian(wrapper_fn, x, None, Some(&sparsity), Some(options.clone()))?;
313
314    // Extract the gradient (first and only row of the Jacobian)
315    let grad_array = jacobian.to_array();
316    Ok(grad_array.row(0).to_owned())
317}
318
319/// Compute gradient using sparse finite differences with a scalar function
320#[allow(dead_code)]
321fn finite_difference_gradient_sparse<F, S>(
322    fun: &mut F,
323    x: &ArrayView1<f64>,
324    options: &SparseFiniteDiffOptions,
325) -> Result<Array1<f64>, OptimizeError>
326where
327    F: FnMut(&ArrayView1<f64>) -> S,
328    S: Into<f64>,
329{
330    let n = x.len();
331    let mut grad = Array1::<f64>::zeros(n);
332    let step = options.rel_step.unwrap_or(1e-8);
333
334    // Compute f0
335    let f0 = fun(x).into();
336
337    // Use finite differences to compute gradient
338    let mut x_perturbed = x.to_owned();
339    for i in 0..n {
340        let h = step * (1.0 + x[i].abs());
341        x_perturbed[i] = x[i] + h;
342
343        let f_plus = fun(&x_perturbed.view()).into();
344
345        if !f_plus.is_finite() {
346            return Err(OptimizeError::ComputationError(
347                "Function returned non-finite value during gradient computation".to_string(),
348            ));
349        }
350
351        grad[i] = (f_plus - f0) / h;
352        x_perturbed[i] = x[i]; // Reset
353    }
354
355    Ok(grad)
356}
357
358/// Compute L-BFGS search direction using two-loop recursion
359#[allow(dead_code)]
360fn compute_lbfgs_direction(
361    g: &Array1<f64>,
362    s_history: &[Array1<f64>],
363    y_history: &[Array1<f64>],
364) -> Array1<f64> {
365    let m = s_history.len();
366    if m == 0 {
367        return -g; // Steepest descent if no _history
368    }
369
370    let mut q = g.clone();
371    let mut alpha = vec![0.0; m];
372
373    // First loop: backward through _history
374    for i in (0..m).rev() {
375        let rho_i = 1.0 / y_history[i].dot(&s_history[i]);
376        alpha[i] = rho_i * s_history[i].dot(&q);
377        q = &q - alpha[i] * &y_history[i];
378    }
379
380    // Apply initial Hessian approximation (simple scaling)
381    let gamma = if m > 0 {
382        s_history[m - 1].dot(&y_history[m - 1]) / y_history[m - 1].dot(&y_history[m - 1])
383    } else {
384        1.0
385    };
386    let mut r = gamma * q;
387
388    // Second loop: forward through _history
389    for i in 0..m {
390        let rho_i = 1.0 / y_history[i].dot(&s_history[i]);
391        let beta = rho_i * y_history[i].dot(&r);
392        r = &r + (alpha[i] - beta) * &s_history[i];
393    }
394
395    -r // Search direction
396}
397
398/// Auto-detect sparsity pattern by evaluating the function at multiple points
399#[allow(dead_code)]
400pub fn auto_detect_sparsity<F, S>(
401    fun: F,
402    x: &ArrayView1<f64>,
403    num_samples: Option<usize>,
404    threshold: Option<f64>,
405) -> Result<CsrArray<f64>, OptimizeError>
406where
407    F: Fn(&ArrayView1<f64>) -> S + Clone,
408    S: Into<f64> + Clone,
409{
410    let n = x.len();
411    let num_samples = num_samples.unwrap_or(std::cmp::min(n, 10));
412    let threshold = threshold.unwrap_or(1e-10);
413
414    // Sample points around x
415    let mut sparsity_pattern = Array2::<f64>::zeros((1, n));
416
417    for _sample in 0..num_samples {
418        // Create a small random perturbation
419        let mut x_pert = x.to_owned();
420        for i in 0..n {
421            x_pert[i] += 1e-6 * rand::rng().random_range(-0.5..0.5);
422        }
423
424        // Compute gradient at this point
425        let options = SparseFiniteDiffOptions::default();
426        if let Ok(grad) =
427            finite_difference_gradient_sparse(&mut fun.clone(), &x_pert.view(), &options)
428        {
429            for i in 0..n {
430                if grad[i].abs() > threshold {
431                    sparsity_pattern[[0, i]] = 1.0;
432                }
433            }
434        }
435    }
436
437    // Convert to sparse format
438    let mut rows = Vec::new();
439    let mut cols = Vec::new();
440    let mut data = Vec::new();
441
442    for j in 0..n {
443        if sparsity_pattern[[0, j]] > 0.0 {
444            rows.push(0);
445            cols.push(j);
446            data.push(1.0);
447        }
448    }
449
450    if rows.is_empty() {
451        // If no sparsity detected, assume dense
452        for j in 0..n {
453            rows.push(0);
454            cols.push(j);
455            data.push(1.0);
456        }
457    }
458
459    CsrArray::from_triplets(&rows, &cols, &data, (1, n), false)
460        .map_err(|e| OptimizeError::ValueError(format!("Failed to create sparsity pattern: {}", e)))
461}
462
463#[cfg(test)]
464mod tests {
465    use super::*;
466    use approx::assert_abs_diff_eq;
467
468    #[test]
469    fn test_sparse_bfgs_quadratic() {
470        let quadratic = |x: &ArrayView1<f64>| -> f64 {
471            // Simple quadratic: f(x) = sum(x_i^2)
472            x.mapv(|xi| xi.powi(2)).sum()
473        };
474
475        let n = 50; // Medium-sized problem
476        let x0 = Array1::ones(n);
477        let mut options = SparseOptimizationOptions::default();
478        options.sparse_threshold = 10; // Force sparse operations
479
480        let result = minimize_sparse_bfgs(quadratic, x0, &options).unwrap();
481
482        assert!(result.success);
483        // Should converge to origin
484        for i in 0..n {
485            assert_abs_diff_eq!(result.x[i], 0.0, epsilon = 1e-5);
486        }
487    }
488
489    #[test]
490    fn test_auto_detect_sparsity() {
491        let sparse_fn = |x: &ArrayView1<f64>| -> f64 {
492            // Only depends on first two variables
493            x[0].powi(2) + x[1].powi(2)
494        };
495
496        let x = Array1::zeros(10);
497        let sparsity = auto_detect_sparsity(sparse_fn, &x.view(), Some(5), Some(1e-8)).unwrap();
498
499        // Should detect that only first two variables matter
500        let dense = sparsity.to_array();
501        assert!(dense[[0, 0]] > 0.0);
502        assert!(dense[[0, 1]] > 0.0);
503        // Rest should be zero (or close to zero)
504        for i in 2..10 {
505            assert!(dense[[0, i]].abs() < 1e-8);
506        }
507    }
508}