scirs2_optimize/unconstrained/
sparse_optimization.rs

1//! Sparse optimization algorithms for high-dimensional problems
2//!
3//! This module provides optimization algorithms that leverage sparse numerical
4//! differentiation and matrix operations for efficient handling of high-dimensional
5//! optimization problems.
6
7use crate::error::OptimizeError;
8use crate::sparse_numdiff::{sparse_jacobian, SparseFiniteDiffOptions};
9use crate::unconstrained::line_search::backtracking_line_search;
10use crate::unconstrained::result::OptimizeResult;
11use crate::unconstrained::utils::check_convergence;
12use crate::unconstrained::Options;
13use ndarray::{Array1, Array2, ArrayView1, Axis};
14use scirs2_sparse::{csr_array::CsrArray, sparray::SparseArray};
15
16/// Options for sparse optimization algorithms
17#[derive(Debug, Clone)]
18pub struct SparseOptimizationOptions {
19    /// Basic optimization options
20    pub base_options: Options,
21    /// Sparse finite difference options
22    pub sparse_options: SparseFiniteDiffOptions,
23    /// Known sparsity pattern for gradient/Jacobian (if None, auto-detected)
24    pub sparsity_pattern: Option<CsrArray<f64>>,
25    /// Whether to use sparse matrix operations throughout
26    pub use_sparse_operations: bool,
27    /// Memory threshold for switching to sparse operations (number of variables)
28    pub sparse_threshold: usize,
29}
30
31impl Default for SparseOptimizationOptions {
32    fn default() -> Self {
33        Self {
34            base_options: Options::default(),
35            sparse_options: SparseFiniteDiffOptions::default(),
36            sparsity_pattern: None,
37            use_sparse_operations: true,
38            sparse_threshold: 100, // Use sparse operations for 100+ variables
39        }
40    }
41}
42
43/// Sparse BFGS algorithm for large-scale optimization
44pub fn minimize_sparse_bfgs<F, S>(
45    fun: F,
46    x0: Array1<f64>,
47    options: &SparseOptimizationOptions,
48) -> Result<OptimizeResult<S>, OptimizeError>
49where
50    F: Fn(&ArrayView1<f64>) -> S + Clone + Sync,
51    S: Into<f64> + Clone,
52{
53    let n = x0.len();
54    let base_opts = &options.base_options;
55    let sparse_opts = &options.sparse_options;
56
57    // Decide whether to use sparse operations based on problem size
58    let use_sparse = options.use_sparse_operations && n >= options.sparse_threshold;
59
60    // Initialize variables
61    let mut x = x0.to_owned();
62    let bounds = base_opts.bounds.as_ref();
63
64    // Ensure initial point is within bounds
65    if let Some(bounds) = bounds {
66        bounds.project(x.as_slice_mut().unwrap());
67    }
68
69    let mut f = fun(&x.view()).into();
70
71    // Calculate initial gradient
72    let mut g = if use_sparse && options.sparsity_pattern.is_some() {
73        // Use sparse gradient computation
74        compute_sparse_gradient(&fun, &x.view(), sparse_opts)?
75    } else {
76        // Use dense gradient computation - create a mutable wrapper
77        let mut fun_wrapper = |x_val: &ArrayView1<f64>| fun(x_val);
78        crate::unconstrained::utils::finite_difference_gradient(
79            &mut fun_wrapper,
80            &x.view(),
81            base_opts.eps,
82        )?
83    };
84
85    // Initialize inverse Hessian approximation
86    let mut h_inv = if use_sparse {
87        // For sparse case, we could use L-BFGS-like limited memory approach
88        // or a sparse approximation of the inverse Hessian
89        None
90    } else {
91        Some(Array2::eye(n))
92    };
93
94    // For sparse case, use L-BFGS-like storage
95    let mut s_history: Vec<Array1<f64>> = Vec::new();
96    let mut y_history: Vec<Array1<f64>> = Vec::new();
97    let max_history = 10; // L-BFGS memory
98
99    // Initialize counters
100    let mut iter = 0;
101    let mut nfev = 1; // Initial function evaluation
102
103    // Main loop
104    while iter < base_opts.max_iter {
105        // Check convergence on gradient
106        if g.mapv(|gi| gi.abs()).sum() < base_opts.gtol {
107            break;
108        }
109
110        // Compute search direction
111        let mut p = if use_sparse && !s_history.is_empty() {
112            // Use L-BFGS two-loop recursion for sparse case
113            compute_lbfgs_direction(&g, &s_history, &y_history)
114        } else if let Some(ref h) = h_inv {
115            // Use dense BFGS
116            -h.dot(&g)
117        } else {
118            // Fallback: steepest descent
119            -&g
120        };
121
122        // Project search direction for bounded optimization
123        if let Some(bounds) = bounds {
124            for i in 0..n {
125                let mut can_decrease = true;
126                let mut can_increase = true;
127
128                // Check if at boundary
129                if let Some(lb) = bounds.lower[i] {
130                    if x[i] <= lb + base_opts.eps {
131                        can_decrease = false;
132                    }
133                }
134                if let Some(ub) = bounds.upper[i] {
135                    if x[i] >= ub - base_opts.eps {
136                        can_increase = false;
137                    }
138                }
139
140                // Project gradient component
141                if (g[i] > 0.0 && !can_decrease) || (g[i] < 0.0 && !can_increase) {
142                    p[i] = 0.0;
143                }
144            }
145
146            // If no movement is possible, we're at a constrained optimum
147            if p.mapv(|pi| pi.abs()).sum() < 1e-10 {
148                break;
149            }
150        }
151
152        // Line search
153        let alpha_init = 1.0;
154        let mut fun_wrapper = |x_val: &ArrayView1<f64>| fun(x_val);
155        let (alpha, f_new) = backtracking_line_search(
156            &mut fun_wrapper,
157            &x.view(),
158            f,
159            &p.view(),
160            &g.view(),
161            alpha_init,
162            0.0001,
163            0.5,
164            bounds,
165        );
166
167        nfev += 1;
168
169        // Update position
170        let s = alpha * &p;
171        let x_new = &x + &s;
172
173        // Check step size convergence
174        if s.mapv(|si| si.abs()).sum() < base_opts.xtol {
175            x = x_new;
176            break;
177        }
178
179        // Calculate new gradient
180        let g_new = if use_sparse && options.sparsity_pattern.is_some() {
181            compute_sparse_gradient(&fun, &x_new.view(), sparse_opts)?
182        } else {
183            let mut fun_wrapper = |x_val: &ArrayView1<f64>| fun(x_val);
184            crate::unconstrained::utils::finite_difference_gradient(
185                &mut fun_wrapper,
186                &x_new.view(),
187                base_opts.eps,
188            )?
189        };
190
191        nfev += n; // Approximate function evaluations for gradient
192
193        // Gradient difference
194        let y = &g_new - &g;
195
196        // Check convergence on function value
197        if check_convergence(
198            f - f_new,
199            0.0,
200            g_new.mapv(|gi| gi.abs()).sum(),
201            base_opts.ftol,
202            0.0,
203            base_opts.gtol,
204        ) {
205            x = x_new;
206            g = g_new;
207            break;
208        }
209
210        // Update approximation
211        let s_dot_y = s.dot(&y);
212        if s_dot_y > 1e-10 {
213            if use_sparse {
214                // Update L-BFGS history
215                s_history.push(s.clone());
216                y_history.push(y.clone());
217
218                // Keep only recent history
219                if s_history.len() > max_history {
220                    s_history.remove(0);
221                    y_history.remove(0);
222                }
223            } else if let Some(ref mut h) = h_inv {
224                // Dense BFGS update
225                let rho = 1.0 / s_dot_y;
226                let i_mat = Array2::eye(n);
227
228                let y_col = y.clone().insert_axis(Axis(1));
229                let s_row = s.clone().insert_axis(Axis(0));
230                let y_s_t = y_col.dot(&s_row);
231                let term1 = &i_mat - &(&y_s_t * rho);
232
233                let s_col = s.clone().insert_axis(Axis(1));
234                let y_row = y.clone().insert_axis(Axis(0));
235                let s_y_t = s_col.dot(&y_row);
236                let term2 = &i_mat - &(&s_y_t * rho);
237
238                let term3 = term1.dot(h);
239                *h = term3.dot(&term2) + rho * s_col.dot(&s_row);
240            }
241        }
242
243        // Update variables for next iteration
244        x = x_new;
245        f = f_new;
246        g = g_new;
247        iter += 1;
248    }
249
250    // Final check for bounds
251    if let Some(bounds) = bounds {
252        bounds.project(x.as_slice_mut().unwrap());
253    }
254
255    // Use original function for final value
256    let final_fun = fun(&x.view());
257
258    Ok(OptimizeResult {
259        x,
260        fun: final_fun,
261        iterations: iter,
262        nit: iter,
263        func_evals: nfev,
264        nfev,
265        success: iter < base_opts.max_iter,
266        message: if iter < base_opts.max_iter {
267            "Sparse optimization terminated successfully.".to_string()
268        } else {
269            "Maximum iterations reached.".to_string()
270        },
271        jacobian: Some(g),
272        hessian: None,
273    })
274}
275
276/// Compute sparse gradient using sparse numerical differentiation
277pub fn compute_sparse_gradient<F, S>(
278    fun: &F,
279    x: &ArrayView1<f64>,
280    options: &SparseFiniteDiffOptions,
281) -> Result<Array1<f64>, OptimizeError>
282where
283    F: Fn(&ArrayView1<f64>) -> S + Clone + Sync,
284    S: Into<f64> + Clone,
285{
286    // Create wrapper that returns Array1<f64> instead of scalar
287    let wrapper_fn = |x_val: &ArrayView1<f64>| -> Array1<f64> {
288        let result: f64 = fun(x_val).into();
289        Array1::from(vec![result])
290    };
291
292    // Create a 1x1 sparsity pattern (gradient of scalar function)
293    let mut rows = vec![0];
294    let mut cols = vec![];
295    let mut data = vec![];
296
297    for i in 0..x.len() {
298        rows.push(0);
299        cols.push(i);
300        data.push(1.0);
301    }
302    rows.remove(0); // Remove the first element we added
303
304    let sparsity =
305        CsrArray::from_triplets(&rows, &cols, &data, (1, x.len()), false).map_err(|e| {
306            OptimizeError::ValueError(format!("Failed to create sparsity pattern: {}", e))
307        })?;
308
309    // Compute sparse Jacobian (which gives us the gradient)
310    let jacobian = sparse_jacobian(wrapper_fn, x, None, Some(&sparsity), Some(options.clone()))?;
311
312    // Extract the gradient (first and only row of the Jacobian)
313    let grad_array = jacobian.to_array();
314    Ok(grad_array.row(0).to_owned())
315}
316
317/// Compute gradient using sparse finite differences with a scalar function
318fn finite_difference_gradient_sparse<F, S>(
319    fun: &mut F,
320    x: &ArrayView1<f64>,
321    options: &SparseFiniteDiffOptions,
322) -> Result<Array1<f64>, OptimizeError>
323where
324    F: FnMut(&ArrayView1<f64>) -> S,
325    S: Into<f64>,
326{
327    let n = x.len();
328    let mut grad = Array1::<f64>::zeros(n);
329    let step = options.rel_step.unwrap_or(1e-8);
330
331    // Compute f0
332    let f0 = fun(x).into();
333
334    // Use finite differences to compute gradient
335    let mut x_perturbed = x.to_owned();
336    for i in 0..n {
337        let h = step * (1.0 + x[i].abs());
338        x_perturbed[i] = x[i] + h;
339
340        let f_plus = fun(&x_perturbed.view()).into();
341
342        if !f_plus.is_finite() {
343            return Err(OptimizeError::ComputationError(
344                "Function returned non-finite value during gradient computation".to_string(),
345            ));
346        }
347
348        grad[i] = (f_plus - f0) / h;
349        x_perturbed[i] = x[i]; // Reset
350    }
351
352    Ok(grad)
353}
354
355/// Compute L-BFGS search direction using two-loop recursion
356fn compute_lbfgs_direction(
357    g: &Array1<f64>,
358    s_history: &[Array1<f64>],
359    y_history: &[Array1<f64>],
360) -> Array1<f64> {
361    let m = s_history.len();
362    if m == 0 {
363        return -g; // Steepest descent if no history
364    }
365
366    let mut q = g.clone();
367    let mut alpha = vec![0.0; m];
368
369    // First loop: backward through history
370    for i in (0..m).rev() {
371        let rho_i = 1.0 / y_history[i].dot(&s_history[i]);
372        alpha[i] = rho_i * s_history[i].dot(&q);
373        q = &q - alpha[i] * &y_history[i];
374    }
375
376    // Apply initial Hessian approximation (simple scaling)
377    let gamma = if m > 0 {
378        s_history[m - 1].dot(&y_history[m - 1]) / y_history[m - 1].dot(&y_history[m - 1])
379    } else {
380        1.0
381    };
382    let mut r = gamma * q;
383
384    // Second loop: forward through history
385    for i in 0..m {
386        let rho_i = 1.0 / y_history[i].dot(&s_history[i]);
387        let beta = rho_i * y_history[i].dot(&r);
388        r = &r + (alpha[i] - beta) * &s_history[i];
389    }
390
391    -r // Search direction
392}
393
394/// Auto-detect sparsity pattern by evaluating the function at multiple points
395pub fn auto_detect_sparsity<F, S>(
396    fun: F,
397    x: &ArrayView1<f64>,
398    num_samples: Option<usize>,
399    threshold: Option<f64>,
400) -> Result<CsrArray<f64>, OptimizeError>
401where
402    F: Fn(&ArrayView1<f64>) -> S + Clone,
403    S: Into<f64> + Clone,
404{
405    let n = x.len();
406    let num_samples = num_samples.unwrap_or(std::cmp::min(n, 10));
407    let threshold = threshold.unwrap_or(1e-10);
408
409    // Sample points around x
410    let mut sparsity_pattern = Array2::<f64>::zeros((1, n));
411
412    for _sample in 0..num_samples {
413        // Create a small random perturbation
414        let mut x_pert = x.to_owned();
415        for i in 0..n {
416            x_pert[i] += 1e-6 * (rand::random::<f64>() - 0.5);
417        }
418
419        // Compute gradient at this point
420        let options = SparseFiniteDiffOptions::default();
421        if let Ok(grad) =
422            finite_difference_gradient_sparse(&mut fun.clone(), &x_pert.view(), &options)
423        {
424            for i in 0..n {
425                if grad[i].abs() > threshold {
426                    sparsity_pattern[[0, i]] = 1.0;
427                }
428            }
429        }
430    }
431
432    // Convert to sparse format
433    let mut rows = Vec::new();
434    let mut cols = Vec::new();
435    let mut data = Vec::new();
436
437    for j in 0..n {
438        if sparsity_pattern[[0, j]] > 0.0 {
439            rows.push(0);
440            cols.push(j);
441            data.push(1.0);
442        }
443    }
444
445    if rows.is_empty() {
446        // If no sparsity detected, assume dense
447        for j in 0..n {
448            rows.push(0);
449            cols.push(j);
450            data.push(1.0);
451        }
452    }
453
454    CsrArray::from_triplets(&rows, &cols, &data, (1, n), false)
455        .map_err(|e| OptimizeError::ValueError(format!("Failed to create sparsity pattern: {}", e)))
456}
457
458#[cfg(test)]
459mod tests {
460    use super::*;
461    use approx::assert_abs_diff_eq;
462
463    #[test]
464    fn test_sparse_bfgs_quadratic() {
465        let quadratic = |x: &ArrayView1<f64>| -> f64 {
466            // Simple quadratic: f(x) = sum(x_i^2)
467            x.mapv(|xi| xi.powi(2)).sum()
468        };
469
470        let n = 50; // Medium-sized problem
471        let x0 = Array1::ones(n);
472        let mut options = SparseOptimizationOptions::default();
473        options.sparse_threshold = 10; // Force sparse operations
474
475        let result = minimize_sparse_bfgs(quadratic, x0, &options).unwrap();
476
477        assert!(result.success);
478        // Should converge to origin
479        for i in 0..n {
480            assert_abs_diff_eq!(result.x[i], 0.0, epsilon = 1e-5);
481        }
482    }
483
484    #[test]
485    fn test_auto_detect_sparsity() {
486        let sparse_fn = |x: &ArrayView1<f64>| -> f64 {
487            // Only depends on first two variables
488            x[0].powi(2) + x[1].powi(2)
489        };
490
491        let x = Array1::zeros(10);
492        let sparsity = auto_detect_sparsity(sparse_fn, &x.view(), Some(5), Some(1e-8)).unwrap();
493
494        // Should detect that only first two variables matter
495        let dense = sparsity.to_array();
496        assert!(dense[[0, 0]] > 0.0);
497        assert!(dense[[0, 1]] > 0.0);
498        // Rest should be zero (or close to zero)
499        for i in 2..10 {
500            assert!(dense[[0, i]].abs() < 1e-8);
501        }
502    }
503}