scirs2_optimize/unconstrained/
memory_efficient.rs

1//! Memory-efficient algorithms for large-scale optimization problems
2//!
3//! This module provides optimization algorithms designed to handle very large problems
4//! with limited memory by using chunked processing, streaming algorithms, and
5//! memory pool management.
6
7use crate::error::OptimizeError;
8use crate::unconstrained::line_search::backtracking_line_search;
9use crate::unconstrained::result::OptimizeResult;
10use crate::unconstrained::utils::check_convergence;
11use crate::unconstrained::Options;
12use ndarray::{Array1, ArrayView1};
13use std::collections::VecDeque;
14
15/// Memory optimization options for large-scale problems
16#[derive(Debug, Clone)]
17pub struct MemoryOptions {
18    /// Base optimization options
19    pub base_options: Options,
20    /// Maximum memory usage in bytes (0 means unlimited)
21    pub max_memory_bytes: usize,
22    /// Chunk size for processing large vectors
23    pub chunk_size: usize,
24    /// Maximum history size for L-BFGS-style methods
25    pub max_history: usize,
26    /// Whether to use memory pooling
27    pub use_memory_pool: bool,
28    /// Whether to use out-of-core storage for very large problems
29    pub use_out_of_core: bool,
30    /// Temporary directory for out-of-core storage
31    pub temp_dir: Option<std::path::PathBuf>,
32}
33
34impl Default for MemoryOptions {
35    fn default() -> Self {
36        Self {
37            base_options: Options::default(),
38            max_memory_bytes: 0, // Unlimited by default
39            chunk_size: 1024,    // Process 1024 elements at a time
40            max_history: 10,     // Keep last 10 iterations
41            use_memory_pool: true,
42            use_out_of_core: false,
43            temp_dir: None,
44        }
45    }
46}
47
48/// Memory pool for reusing arrays to reduce allocations
49struct MemoryPool {
50    array_pool: VecDeque<Array1<f64>>,
51    max_pool_size: usize,
52}
53
54impl MemoryPool {
55    fn new(max_size: usize) -> Self {
56        Self {
57            array_pool: VecDeque::new(),
58            max_pool_size: max_size,
59        }
60    }
61
62    fn get_array(&mut self, size: usize) -> Array1<f64> {
63        // Try to reuse an existing array of the right size
64        for i in 0..self.array_pool.len() {
65            if self.array_pool[i].len() == size {
66                return self.array_pool.remove(i).unwrap();
67            }
68        }
69        // If no suitable array found, create a new one
70        Array1::zeros(size)
71    }
72
73    fn return_array(&mut self, mut array: Array1<f64>) {
74        if self.array_pool.len() < self.max_pool_size {
75            // Zero out the array for reuse
76            array.fill(0.0);
77            self.array_pool.push_back(array);
78        }
79        // If pool is full, just drop the array
80    }
81}
82
83/// Streaming gradient computation for very large problems
84struct StreamingGradient {
85    chunk_size: usize,
86    eps: f64,
87}
88
89impl StreamingGradient {
90    fn new(chunk_size: usize, eps: f64) -> Self {
91        Self { chunk_size, eps }
92    }
93
94    /// Compute gradient using chunked finite differences
95    fn compute<F, S>(&self, fun: &mut F, x: &ArrayView1<f64>) -> Result<Array1<f64>, OptimizeError>
96    where
97        F: FnMut(&ArrayView1<f64>) -> S,
98        S: Into<f64>,
99    {
100        let n = x.len();
101        let mut grad = Array1::zeros(n);
102        let f0 = fun(x).into();
103
104        let mut x_pert = x.to_owned();
105
106        // Process gradient computation in chunks to limit memory usage
107        for chunk_start in (0..n).step_by(self.chunk_size) {
108            let chunk_end = std::cmp::min(chunk_start + self.chunk_size, n);
109
110            for i in chunk_start..chunk_end {
111                let h = self.eps * (1.0 + x[i].abs());
112                x_pert[i] = x[i] + h;
113
114                let f_plus = fun(&x_pert.view()).into();
115
116                if !f_plus.is_finite() {
117                    return Err(OptimizeError::ComputationError(
118                        "Function returned non-finite value during gradient computation"
119                            .to_string(),
120                    ));
121                }
122
123                grad[i] = (f_plus - f0) / h;
124                x_pert[i] = x[i]; // Reset
125            }
126
127            // Optional: yield control to prevent blocking for too long
128            // In a real implementation, this could check for cancellation
129        }
130
131        Ok(grad)
132    }
133}
134
135/// Memory-efficient L-BFGS implementation with bounded history
136pub fn minimize_memory_efficient_lbfgs<F, S>(
137    mut fun: F,
138    x0: Array1<f64>,
139    options: &MemoryOptions,
140) -> Result<OptimizeResult<S>, OptimizeError>
141where
142    F: FnMut(&ArrayView1<f64>) -> S + Clone,
143    S: Into<f64> + Clone,
144{
145    let n = x0.len();
146    let base_opts = &options.base_options;
147
148    // Initialize memory pool if enabled
149    let mut memory_pool = if options.use_memory_pool {
150        Some(MemoryPool::new(options.max_history * 2))
151    } else {
152        None
153    };
154
155    // Estimate memory usage
156    let estimated_memory = estimate_memory_usage(n, options.max_history);
157    if options.max_memory_bytes > 0 && estimated_memory > options.max_memory_bytes {
158        return Err(OptimizeError::ValueError(format!(
159            "Estimated memory usage ({} bytes) exceeds limit ({} bytes). Consider reducing max_history or chunk_size.",
160            estimated_memory, options.max_memory_bytes
161        )));
162    }
163
164    // Initialize variables
165    let mut x = x0.to_owned();
166    let bounds = base_opts.bounds.as_ref();
167
168    // Ensure initial point is within bounds
169    if let Some(bounds) = bounds {
170        bounds.project(x.as_slice_mut().unwrap());
171    }
172
173    let mut f = fun(&x.view()).into();
174
175    // Initialize streaming gradient computer
176    let streaming_grad = StreamingGradient::new(options.chunk_size, base_opts.eps);
177
178    // Calculate initial gradient
179    let mut g = streaming_grad.compute(&mut fun, &x.view())?;
180
181    // L-BFGS history with bounded size
182    let mut s_history: VecDeque<Array1<f64>> = VecDeque::new();
183    let mut y_history: VecDeque<Array1<f64>> = VecDeque::new();
184
185    // Initialize counters
186    let mut iter = 0;
187    let mut nfev = 1 + n.div_ceil(options.chunk_size); // Initial function evaluations
188
189    // Main loop
190    while iter < base_opts.max_iter {
191        // Check convergence on gradient
192        if g.mapv(|gi| gi.abs()).sum() < base_opts.gtol {
193            break;
194        }
195
196        // Compute search direction using L-BFGS two-loop recursion
197        let mut p = if s_history.is_empty() {
198            // Use steepest descent if no history
199            get_array_from_pool(&mut memory_pool, n, |_size| -&g)
200        } else {
201            compute_lbfgs_direction_memory_efficient(&g, &s_history, &y_history, &mut memory_pool)?
202        };
203
204        // Project search direction for bounded optimization
205        if let Some(bounds) = bounds {
206            for i in 0..n {
207                let mut can_decrease = true;
208                let mut can_increase = true;
209
210                // Check if at boundary
211                if let Some(lb) = bounds.lower[i] {
212                    if x[i] <= lb + base_opts.eps {
213                        can_decrease = false;
214                    }
215                }
216                if let Some(ub) = bounds.upper[i] {
217                    if x[i] >= ub - base_opts.eps {
218                        can_increase = false;
219                    }
220                }
221
222                // Project gradient component
223                if (g[i] > 0.0 && !can_decrease) || (g[i] < 0.0 && !can_increase) {
224                    p[i] = 0.0;
225                }
226            }
227
228            // If no movement is possible, we're at a constrained optimum
229            if p.mapv(|pi| pi.abs()).sum() < 1e-10 {
230                return_array_to_pool(&mut memory_pool, p);
231                break;
232            }
233        }
234
235        // Line search
236        let alpha_init = 1.0;
237        let (alpha, f_new) = backtracking_line_search(
238            &mut fun,
239            &x.view(),
240            f,
241            &p.view(),
242            &g.view(),
243            alpha_init,
244            0.0001,
245            0.5,
246            bounds,
247        );
248
249        nfev += 1;
250
251        // Update position
252        let s = get_array_from_pool(&mut memory_pool, n, |_| alpha * &p);
253        let x_new = &x + &s;
254
255        // Check step size convergence
256        if array_norm_chunked(&s, options.chunk_size) < base_opts.xtol {
257            return_array_to_pool(&mut memory_pool, s);
258            return_array_to_pool(&mut memory_pool, p);
259            x = x_new;
260            break;
261        }
262
263        // Calculate new gradient using streaming approach
264        let g_new = streaming_grad.compute(&mut fun, &x_new.view())?;
265        nfev += n.div_ceil(options.chunk_size);
266
267        // Gradient difference
268        let y = get_array_from_pool(&mut memory_pool, n, |_| &g_new - &g);
269
270        // Check convergence on function value
271        if check_convergence(
272            f - f_new,
273            0.0,
274            g_new.mapv(|gi| gi.abs()).sum(),
275            base_opts.ftol,
276            0.0,
277            base_opts.gtol,
278        ) {
279            return_array_to_pool(&mut memory_pool, s);
280            return_array_to_pool(&mut memory_pool, y);
281            return_array_to_pool(&mut memory_pool, p);
282            x = x_new;
283            g = g_new;
284            break;
285        }
286
287        // Update L-BFGS history with memory management
288        let s_dot_y = chunked_dot_product(&s, &y, options.chunk_size);
289        if s_dot_y > 1e-10 {
290            // Add to history
291            s_history.push_back(s);
292            y_history.push_back(y);
293
294            // Remove oldest entries if history is too long
295            while s_history.len() > options.max_history {
296                if let Some(old_s) = s_history.pop_front() {
297                    return_array_to_pool(&mut memory_pool, old_s);
298                }
299                if let Some(old_y) = y_history.pop_front() {
300                    return_array_to_pool(&mut memory_pool, old_y);
301                }
302            }
303        } else {
304            // Don't update history, just return arrays to pool
305            return_array_to_pool(&mut memory_pool, s);
306            return_array_to_pool(&mut memory_pool, y);
307        }
308
309        return_array_to_pool(&mut memory_pool, p);
310
311        // Update variables for next iteration
312        x = x_new;
313        f = f_new;
314        g = g_new;
315        iter += 1;
316    }
317
318    // Clean up remaining arrays in history
319    while let Some(s) = s_history.pop_front() {
320        return_array_to_pool(&mut memory_pool, s);
321    }
322    while let Some(y) = y_history.pop_front() {
323        return_array_to_pool(&mut memory_pool, y);
324    }
325
326    // Final check for bounds
327    if let Some(bounds) = bounds {
328        bounds.project(x.as_slice_mut().unwrap());
329    }
330
331    // Use original function for final value
332    let final_fun = fun(&x.view());
333
334    Ok(OptimizeResult {
335        x,
336        fun: final_fun,
337        iterations: iter,
338        nit: iter,
339        func_evals: nfev,
340        nfev,
341        success: iter < base_opts.max_iter,
342        message: if iter < base_opts.max_iter {
343            "Memory-efficient optimization terminated successfully.".to_string()
344        } else {
345            "Maximum iterations reached.".to_string()
346        },
347        jacobian: Some(g),
348        hessian: None,
349    })
350}
351
352/// Memory-efficient L-BFGS direction computation
353fn compute_lbfgs_direction_memory_efficient(
354    g: &Array1<f64>,
355    s_history: &VecDeque<Array1<f64>>,
356    y_history: &VecDeque<Array1<f64>>,
357    memory_pool: &mut Option<MemoryPool>,
358) -> Result<Array1<f64>, OptimizeError> {
359    let m = s_history.len();
360    if m == 0 {
361        return Ok(-g);
362    }
363
364    let n = g.len();
365    let mut q = get_array_from_pool(memory_pool, n, |_| g.clone());
366    let mut alpha = vec![0.0; m];
367
368    // First loop: backward through history
369    for i in (0..m).rev() {
370        let rho_i = 1.0 / y_history[i].dot(&s_history[i]);
371        alpha[i] = rho_i * s_history[i].dot(&q);
372        let temp = get_array_from_pool(memory_pool, n, |_| &q - alpha[i] * &y_history[i]);
373        return_array_to_pool(memory_pool, q);
374        q = temp;
375    }
376
377    // Apply initial Hessian approximation (simple scaling)
378    let gamma = if m > 0 {
379        s_history[m - 1].dot(&y_history[m - 1]) / y_history[m - 1].dot(&y_history[m - 1])
380    } else {
381        1.0
382    };
383
384    let mut r = get_array_from_pool(memory_pool, n, |_| gamma * &q);
385    return_array_to_pool(memory_pool, q);
386
387    // Second loop: forward through history
388    for i in 0..m {
389        let rho_i = 1.0 / y_history[i].dot(&s_history[i]);
390        let beta = rho_i * y_history[i].dot(&r);
391        let temp = get_array_from_pool(memory_pool, n, |_| &r + (alpha[i] - beta) * &s_history[i]);
392        return_array_to_pool(memory_pool, r);
393        r = temp;
394    }
395
396    // Return -r as the search direction
397    let result = get_array_from_pool(memory_pool, n, |_| -&r);
398    return_array_to_pool(memory_pool, r);
399    Ok(result)
400}
401
402/// Get array from memory pool or create new one
403fn get_array_from_pool<F>(
404    memory_pool: &mut Option<MemoryPool>,
405    size: usize,
406    init_fn: F,
407) -> Array1<f64>
408where
409    F: FnOnce(usize) -> Array1<f64>,
410{
411    match memory_pool {
412        Some(pool) => {
413            let mut array = pool.get_array(size);
414            if array.len() != size {
415                array = Array1::zeros(size);
416            }
417            let result = init_fn(size);
418            pool.return_array(array);
419            result
420        }
421        None => init_fn(size),
422    }
423}
424
425/// Return array to memory pool
426fn return_array_to_pool(memory_pool: &mut Option<MemoryPool>, array: Array1<f64>) {
427    if let Some(pool) = memory_pool {
428        pool.return_array(array);
429    }
430    // If no pool, array will be dropped normally
431}
432
433/// Compute dot product in chunks to reduce memory usage
434fn chunked_dot_product(a: &Array1<f64>, b: &Array1<f64>, chunk_size: usize) -> f64 {
435    let n = a.len();
436    let mut result = 0.0;
437
438    for chunk_start in (0..n).step_by(chunk_size) {
439        let chunk_end = std::cmp::min(chunk_start + chunk_size, n);
440        let a_chunk = a.slice(ndarray::s![chunk_start..chunk_end]);
441        let b_chunk = b.slice(ndarray::s![chunk_start..chunk_end]);
442        result += a_chunk.dot(&b_chunk);
443    }
444
445    result
446}
447
448/// Compute array norm in chunks to reduce memory usage
449fn array_norm_chunked(array: &Array1<f64>, chunk_size: usize) -> f64 {
450    let n = array.len();
451    let mut sum_sq = 0.0;
452
453    for chunk_start in (0..n).step_by(chunk_size) {
454        let chunk_end = std::cmp::min(chunk_start + chunk_size, n);
455        let chunk = array.slice(ndarray::s![chunk_start..chunk_end]);
456        sum_sq += chunk.mapv(|x| x.powi(2)).sum();
457    }
458
459    sum_sq.sqrt()
460}
461
462/// Estimate memory usage for given problem size and history
463fn estimate_memory_usage(n: usize, max_history: usize) -> usize {
464    // Size of f64 in bytes
465    const F64_SIZE: usize = std::mem::size_of::<f64>();
466
467    // Current point and gradient
468    let current_vars = 2 * n * F64_SIZE;
469
470    // L-BFGS history (s and y vectors)
471    let history_size = 2 * max_history * n * F64_SIZE;
472
473    // Temporary arrays for computation
474    let temp_arrays = 4 * n * F64_SIZE;
475
476    current_vars + history_size + temp_arrays
477}
478
479/// Create a memory-efficient optimizer with automatic parameter selection
480pub fn create_memory_efficient_optimizer(
481    problem_size: usize,
482    available_memory_mb: usize,
483) -> MemoryOptions {
484    let available_bytes = available_memory_mb * 1024 * 1024;
485
486    // Estimate parameters based on available memory
487    let max_history = std::cmp::min(
488        20,
489        available_bytes / (2 * problem_size * std::mem::size_of::<f64>() * 4),
490    )
491    .max(1);
492
493    let chunk_size = std::cmp::min(
494        problem_size,
495        std::cmp::max(64, available_bytes / (8 * std::mem::size_of::<f64>())),
496    );
497
498    MemoryOptions {
499        base_options: Options::default(),
500        max_memory_bytes: available_bytes,
501        chunk_size,
502        max_history,
503        use_memory_pool: true,
504        use_out_of_core: available_memory_mb < 100, // Use out-of-core for very limited memory
505        temp_dir: None,
506    }
507}
508
509#[cfg(test)]
510mod tests {
511    use super::*;
512    use approx::assert_abs_diff_eq;
513
514    #[test]
515    fn test_memory_efficient_lbfgs_quadratic() {
516        let quadratic = |x: &ArrayView1<f64>| -> f64 {
517            // Simple quadratic: f(x) = sum(x_i^2)
518            x.mapv(|xi| xi.powi(2)).sum()
519        };
520
521        let n = 100; // Large problem
522        let x0 = Array1::ones(n);
523        let mut options = MemoryOptions::default();
524        options.chunk_size = 32; // Small chunks
525        options.max_history = 5; // Limited history
526
527        let result = minimize_memory_efficient_lbfgs(quadratic, x0, &options).unwrap();
528
529        assert!(result.success);
530        // Should converge to origin
531        for i in 0..std::cmp::min(10, n) {
532            assert_abs_diff_eq!(result.x[i], 0.0, epsilon = 1e-4);
533        }
534    }
535
536    #[test]
537    fn test_chunked_operations() {
538        let a = Array1::from_vec((0..100).map(|i| i as f64).collect());
539        let b = Array1::from_vec((0..100).map(|i| (i * 2) as f64).collect());
540
541        // Test chunked dot product
542        let dot_chunked = chunked_dot_product(&a, &b, 10);
543        let dot_normal = a.dot(&b);
544        assert_abs_diff_eq!(dot_chunked, dot_normal, epsilon = 1e-10);
545
546        // Test chunked norm
547        let norm_chunked = array_norm_chunked(&a, 10);
548        let norm_normal = a.mapv(|x| x.powi(2)).sum().sqrt();
549        assert_abs_diff_eq!(norm_chunked, norm_normal, epsilon = 1e-10);
550    }
551
552    #[test]
553    fn test_memory_pool() {
554        let mut pool = MemoryPool::new(3);
555
556        // Get and return arrays
557        let arr1 = pool.get_array(10);
558        let arr2 = pool.get_array(10);
559
560        pool.return_array(arr1);
561        pool.return_array(arr2);
562
563        // Should reuse arrays
564        let arr3 = pool.get_array(10);
565        let arr4 = pool.get_array(10);
566
567        pool.return_array(arr3);
568        pool.return_array(arr4);
569
570        assert_eq!(pool.array_pool.len(), 2);
571    }
572
573    #[test]
574    fn test_memory_estimation() {
575        let n = 1000;
576        let max_history = 10;
577        let estimated = estimate_memory_usage(n, max_history);
578
579        // Should be reasonable estimate (not zero, not too large)
580        assert!(estimated > 0);
581        assert!(estimated < 1_000_000); // Less than 1MB for this small problem
582    }
583
584    #[test]
585    fn test_auto_parameter_selection() {
586        let options = create_memory_efficient_optimizer(10000, 64); // 64MB available
587
588        assert!(options.chunk_size > 0);
589        assert!(options.max_history > 0);
590        assert!(options.max_memory_bytes > 0);
591    }
592}