scirs2_optimize/unconstrained/
adaptive_convergence.rs

1//! Adaptive tolerance selection and convergence criteria
2//!
3//! This module provides adaptive tolerance selection mechanisms that automatically
4//! adjust convergence criteria based on problem characteristics, function scale,
5//! optimization progress, and numerical stability considerations.
6
7use crate::error::OptimizeError;
8use crate::unconstrained::Options;
9// Remove unused imports - they're not needed for this module
10use std::collections::VecDeque;
11
12/// Adaptive tolerance options
13#[derive(Debug, Clone)]
14pub struct AdaptiveToleranceOptions {
15    /// Initial tolerance for function value convergence
16    pub initial_ftol: f64,
17    /// Initial tolerance for gradient norm convergence
18    pub initial_gtol: f64,
19    /// Initial tolerance for step size convergence
20    pub initial_xtol: f64,
21    /// Minimum allowed tolerance (floor)
22    pub min_tolerance: f64,
23    /// Maximum allowed tolerance (ceiling)
24    pub max_tolerance: f64,
25    /// How aggressively to adapt tolerances (0.0 = no adaptation, 1.0 = maximum)
26    pub adaptation_rate: f64,
27    /// Number of iterations to consider for adaptation
28    pub history_length: usize,
29    /// Whether to consider function scale in adaptation
30    pub use_function_scale: bool,
31    /// Whether to consider gradient scale in adaptation
32    pub use_gradient_scale: bool,
33    /// Whether to adapt based on stagnation detection
34    pub use_stagnation_detection: bool,
35    /// Whether to use problem conditioning in adaptation
36    pub use_problem_conditioning: bool,
37}
38
39impl Default for AdaptiveToleranceOptions {
40    fn default() -> Self {
41        Self {
42            initial_ftol: 1e-8,
43            initial_gtol: 1e-5,
44            initial_xtol: 1e-8,
45            min_tolerance: 1e-15,
46            max_tolerance: 1e-3,
47            adaptation_rate: 0.5,
48            history_length: 10,
49            use_function_scale: true,
50            use_gradient_scale: true,
51            use_stagnation_detection: true,
52            use_problem_conditioning: false, // Requires more analysis
53        }
54    }
55}
56
57/// State tracking for adaptive tolerance selection
58#[derive(Debug, Clone)]
59pub struct AdaptiveToleranceState {
60    /// Current function tolerance
61    pub current_ftol: f64,
62    /// Current gradient tolerance
63    pub current_gtol: f64,
64    /// Current step tolerance
65    pub current_xtol: f64,
66    /// History of function values
67    function_history: VecDeque<f64>,
68    /// History of gradient norms
69    gradient_history: VecDeque<f64>,
70    /// History of step norms
71    step_history: VecDeque<f64>,
72    /// Estimated function scale
73    function_scale: f64,
74    /// Estimated gradient scale
75    gradient_scale: f64,
76    /// Number of stagnant iterations
77    stagnant_nit: usize,
78    /// Problem dimension
79    problem_dim: usize,
80    /// Options
81    options: AdaptiveToleranceOptions,
82}
83
84impl AdaptiveToleranceState {
85    /// Create new adaptive tolerance state
86    pub fn new(options: AdaptiveToleranceOptions, problem_dim: usize) -> Self {
87        Self {
88            current_ftol: options.initial_ftol,
89            current_gtol: options.initial_gtol,
90            current_xtol: options.initial_xtol,
91            function_history: VecDeque::with_capacity(options.history_length),
92            gradient_history: VecDeque::with_capacity(options.history_length),
93            step_history: VecDeque::with_capacity(options.history_length),
94            function_scale: 1.0,
95            gradient_scale: 1.0,
96            stagnant_nit: 0,
97            problem_dim,
98            options,
99        }
100    }
101
102    /// Update state with new iteration data and adapt tolerances
103    pub fn update(
104        &mut self,
105        function_value: f64,
106        gradient_norm: f64,
107        step_norm: f64,
108        iteration: usize,
109    ) -> Result<(), OptimizeError> {
110        // Add to history
111        self.add_to_history(function_value, gradient_norm, step_norm);
112
113        // Update scales
114        if self.options.use_function_scale {
115            self.update_function_scale();
116        }
117        if self.options.use_gradient_scale {
118            self.update_gradient_scale();
119        }
120
121        // Detect stagnation
122        if self.options.use_stagnation_detection {
123            self.update_stagnation_detection();
124        }
125
126        // Adapt tolerances based on accumulated information
127        self.adapt_tolerances(iteration)?;
128
129        Ok(())
130    }
131
132    /// Add new values to history
133    fn add_to_history(&mut self, function_value: f64, gradient_norm: f64, step_norm: f64) {
134        // Add to function history
135        if self.function_history.len() >= self.options.history_length {
136            self.function_history.pop_front();
137        }
138        self.function_history.push_back(function_value);
139
140        // Add to gradient history
141        if self.gradient_history.len() >= self.options.history_length {
142            self.gradient_history.pop_front();
143        }
144        self.gradient_history.push_back(gradient_norm);
145
146        // Add to step history
147        if self.step_history.len() >= self.options.history_length {
148            self.step_history.pop_front();
149        }
150        self.step_history.push_back(step_norm);
151    }
152
153    /// Update function scale estimate
154    fn update_function_scale(&mut self) {
155        if self.function_history.len() < 2 {
156            return;
157        }
158
159        // Estimate function scale as a robust measure of typical function values
160        let mut values: Vec<f64> = self.function_history.iter().map(|&x| x.abs()).collect();
161        values.sort_by(|a, b| a.partial_cmp(b).unwrap());
162
163        // Use median as robust scale estimate
164        let median_idx = values.len() / 2;
165        let scale = if values.len() % 2 == 0 {
166            0.5 * (values[median_idx - 1] + values[median_idx])
167        } else {
168            values[median_idx]
169        };
170
171        // Smooth the scale update
172        let alpha = 0.1; // Smoothing factor
173        self.function_scale = alpha * scale.max(1e-15) + (1.0 - alpha) * self.function_scale;
174    }
175
176    /// Update gradient scale estimate
177    fn update_gradient_scale(&mut self) {
178        if self.gradient_history.len() < 2 {
179            return;
180        }
181
182        // Estimate gradient scale as robust measure of typical gradient norms
183        let mut values: Vec<f64> = self.gradient_history.iter().cloned().collect();
184        values.sort_by(|a, b| a.partial_cmp(b).unwrap());
185
186        // Use median as robust scale estimate
187        let median_idx = values.len() / 2;
188        let scale = if values.len() % 2 == 0 {
189            0.5 * (values[median_idx - 1] + values[median_idx])
190        } else {
191            values[median_idx]
192        };
193
194        // Smooth the scale update
195        let alpha = 0.1;
196        self.gradient_scale = alpha * scale.max(1e-15) + (1.0 - alpha) * self.gradient_scale;
197    }
198
199    /// Update stagnation detection
200    fn update_stagnation_detection(&mut self) {
201        if self.function_history.len() < 3 {
202            self.stagnant_nit = 0;
203            return;
204        }
205
206        // Check if function values are not decreasing significantly
207        let recent_len = std::cmp::min(3, self.function_history.len());
208        let recent_values: Vec<f64> = self
209            .function_history
210            .iter()
211            .rev()
212            .take(recent_len)
213            .cloned()
214            .collect();
215
216        let relative_change = if recent_values.len() >= 2 {
217            let f_new = recent_values[0];
218            let f_old = recent_values[recent_values.len() - 1];
219            if f_old.abs() > 1e-15 {
220                (f_old - f_new).abs() / f_old.abs()
221            } else {
222                (f_old - f_new).abs()
223            }
224        } else {
225            1.0
226        };
227
228        // Consider stagnant if relative change is very small
229        if relative_change < 1e-10 {
230            self.stagnant_nit += 1;
231        } else {
232            self.stagnant_nit = 0;
233        }
234    }
235
236    /// Adapt tolerances based on collected information
237    fn adapt_tolerances(&mut self, iteration: usize) -> Result<(), OptimizeError> {
238        if iteration < 5 || self.options.adaptation_rate <= 0.0 {
239            return Ok(()); // Don't adapt too early or if adaptation is disabled
240        }
241
242        // Function tolerance adaptation
243        if self.options.use_function_scale {
244            let scale_factor = self.function_scale;
245            let adaptive_ftol = self.options.initial_ftol * scale_factor;
246
247            self.current_ftol = self.blend_tolerance(
248                self.current_ftol,
249                adaptive_ftol,
250                self.options.adaptation_rate * 0.5, // More conservative for ftol
251            );
252        }
253
254        // Gradient tolerance adaptation
255        if self.options.use_gradient_scale {
256            let scale_factor = self.gradient_scale;
257            let dimension_factor = (self.problem_dim as f64).sqrt(); // Account for dimension
258            let adaptive_gtol = self.options.initial_gtol * scale_factor / dimension_factor;
259
260            self.current_gtol = self.blend_tolerance(
261                self.current_gtol,
262                adaptive_gtol,
263                self.options.adaptation_rate,
264            );
265        }
266
267        // Step tolerance adaptation
268        let adaptive_xtol = if self.step_history.len() >= 3 {
269            // Adapt based on typical step sizes
270            let typical_step =
271                self.step_history.iter().sum::<f64>() / self.step_history.len() as f64;
272            self.options.initial_xtol * typical_step.max(1e-12)
273        } else {
274            self.options.initial_xtol
275        };
276
277        self.current_xtol = self.blend_tolerance(
278            self.current_xtol,
279            adaptive_xtol,
280            self.options.adaptation_rate * 0.3, // More conservative for xtol
281        );
282
283        // Stagnation-based adaptation
284        if self.options.use_stagnation_detection && self.stagnant_nit > 5 {
285            // Relax tolerances if we're stagnating
286            let relaxation_factor = 1.0 + 0.1 * (self.stagnant_nit as f64 - 5.0).min(10.0);
287            self.current_ftol *= relaxation_factor;
288            self.current_gtol *= relaxation_factor;
289            self.current_xtol *= relaxation_factor;
290        }
291
292        // Apply bounds
293        self.current_ftol = self
294            .current_ftol
295            .clamp(self.options.min_tolerance, self.options.max_tolerance);
296        self.current_gtol = self
297            .current_gtol
298            .clamp(self.options.min_tolerance, self.options.max_tolerance);
299        self.current_xtol = self
300            .current_xtol
301            .clamp(self.options.min_tolerance, self.options.max_tolerance);
302
303        Ok(())
304    }
305
306    /// Blend current tolerance with adaptive tolerance
307    fn blend_tolerance(&self, current: f64, adaptive: f64, rate: f64) -> f64 {
308        rate * adaptive + (1.0 - rate) * current
309    }
310
311    /// Get current Options struct with adapted tolerances
312    pub fn get_current_options(&self, base_options: &Options) -> Options {
313        let mut options = base_options.clone();
314        options.ftol = self.current_ftol;
315        options.gtol = self.current_gtol;
316        options.xtol = self.current_xtol;
317        options
318    }
319
320    /// Check if convergence is achieved with current tolerances
321    pub fn check_convergence(
322        &self,
323        function_change: f64,
324        step_norm: f64,
325        gradient_norm: f64,
326    ) -> ConvergenceStatus {
327        let f_converged = function_change.abs() < self.current_ftol;
328        let x_converged = step_norm < self.current_xtol;
329        let g_converged = gradient_norm < self.current_gtol;
330
331        ConvergenceStatus {
332            f_converged,
333            x_converged,
334            g_converged,
335            overall_converged: f_converged || x_converged || g_converged,
336            ftol_used: self.current_ftol,
337            xtol_used: self.current_xtol,
338            gtol_used: self.current_gtol,
339        }
340    }
341
342    /// Get adaptation statistics for debugging
343    pub fn get_adaptation_stats(&self) -> AdaptationStats {
344        AdaptationStats {
345            function_scale: self.function_scale,
346            gradient_scale: self.gradient_scale,
347            stagnant_nit: self.stagnant_nit,
348            current_ftol: self.current_ftol,
349            current_gtol: self.current_gtol,
350            current_xtol: self.current_xtol,
351            history_length: self.function_history.len(),
352        }
353    }
354}
355
356/// Convergence status with detailed information
357#[derive(Debug, Clone)]
358pub struct ConvergenceStatus {
359    pub f_converged: bool,
360    pub x_converged: bool,
361    pub g_converged: bool,
362    pub overall_converged: bool,
363    pub ftol_used: f64,
364    pub xtol_used: f64,
365    pub gtol_used: f64,
366}
367
368/// Adaptation statistics for debugging and monitoring
369#[derive(Debug, Clone)]
370pub struct AdaptationStats {
371    pub function_scale: f64,
372    pub gradient_scale: f64,
373    pub stagnant_nit: usize,
374    pub current_ftol: f64,
375    pub current_gtol: f64,
376    pub current_xtol: f64,
377    pub history_length: usize,
378}
379
380/// Create adaptive tolerance options optimized for specific problem types
381#[allow(dead_code)]
382pub fn create_adaptive_options_for_problem(
383    problem_type: &str,
384    problem_size: usize,
385) -> AdaptiveToleranceOptions {
386    match problem_type.to_lowercase().as_str() {
387        "well_conditioned" => AdaptiveToleranceOptions {
388            initial_ftol: 1e-10,
389            initial_gtol: 1e-7,
390            initial_xtol: 1e-10,
391            adaptation_rate: 0.3,
392            use_function_scale: true,
393            use_gradient_scale: true,
394            use_stagnation_detection: false, // Well-conditioned problems don't need stagnation handling
395            ..Default::default()
396        },
397        "ill_conditioned" => AdaptiveToleranceOptions {
398            initial_ftol: 1e-6,
399            initial_gtol: 1e-4,
400            initial_xtol: 1e-6,
401            adaptation_rate: 0.7,
402            use_function_scale: true,
403            use_gradient_scale: true,
404            use_stagnation_detection: true,
405            use_problem_conditioning: true,
406            ..Default::default()
407        },
408        "noisy" => AdaptiveToleranceOptions {
409            initial_ftol: 1e-5,
410            initial_gtol: 1e-3,
411            initial_xtol: 1e-5,
412            adaptation_rate: 0.8,
413            history_length: 20, // Longer history for noisy problems
414            use_stagnation_detection: true,
415            ..Default::default()
416        },
417        "high_dimensional" => {
418            let dimension_factor = (problem_size as f64).sqrt();
419            AdaptiveToleranceOptions {
420                initial_ftol: 1e-8,
421                initial_gtol: 1e-5 * dimension_factor,
422                initial_xtol: 1e-8,
423                adaptation_rate: 0.6,
424                use_gradient_scale: true,
425                ..Default::default()
426            }
427        }
428        _ => AdaptiveToleranceOptions::default(),
429    }
430}
431
432/// Enhanced convergence check with adaptive tolerances
433#[allow(dead_code)]
434pub fn check_convergence_adaptive(
435    function_change: f64,
436    step_norm: f64,
437    gradient_norm: f64,
438    adaptive_state: &AdaptiveToleranceState,
439) -> ConvergenceStatus {
440    adaptive_state.check_convergence(function_change, step_norm, gradient_norm)
441}
442
443#[cfg(test)]
444mod tests {
445    use super::*;
446
447    #[test]
448    fn test_adaptive_tolerance_initialization() {
449        let options = AdaptiveToleranceOptions::default();
450        let state = AdaptiveToleranceState::new(options.clone(), 10);
451
452        assert_eq!(state.current_ftol, options.initial_ftol);
453        assert_eq!(state.current_gtol, options.initial_gtol);
454        assert_eq!(state.current_xtol, options.initial_xtol);
455        assert_eq!(state.problem_dim, 10);
456    }
457
458    #[test]
459    fn test_scale_adaptation() {
460        let options = AdaptiveToleranceOptions {
461            use_function_scale: true,
462            adaptation_rate: 1.0, // Full adaptation for testing
463            ..Default::default()
464        };
465        let mut state = AdaptiveToleranceState::new(options, 5);
466
467        // Add some function values with known scale
468        for i in 0..10 {
469            let f_val = 100.0 + i as f64; // Scale around 100
470            state.update(f_val, 1e-5, 1e-8, i).unwrap();
471        }
472
473        // Function scale should adapt to be around 100
474        let stats = state.get_adaptation_stats();
475        assert!(stats.function_scale > 50.0 && stats.function_scale < 200.0);
476    }
477
478    #[test]
479    fn test_stagnation_detection() {
480        let options = AdaptiveToleranceOptions {
481            use_stagnation_detection: true,
482            ..Default::default()
483        };
484        let mut state = AdaptiveToleranceState::new(options, 5);
485
486        // Simulate stagnation by using same function value
487        for i in 0..10 {
488            state.update(1.0, 1e-5, 1e-8, i).unwrap();
489        }
490
491        let stats = state.get_adaptation_stats();
492        assert!(stats.stagnant_nit > 5);
493    }
494
495    #[test]
496    fn test_convergence_check() {
497        let options = AdaptiveToleranceOptions::default();
498        let state = AdaptiveToleranceState::new(options, 5);
499
500        let status = state.check_convergence(1e-10, 1e-10, 1e-10);
501        assert!(status.overall_converged);
502        assert!(status.f_converged);
503        assert!(status.x_converged);
504        assert!(status.g_converged);
505    }
506
507    #[test]
508    fn test_problem_specific_options() {
509        let well_cond = create_adaptive_options_for_problem("well_conditioned", 10);
510        assert!(well_cond.initial_ftol < 1e-8);
511        assert!(!well_cond.use_stagnation_detection);
512
513        let ill_cond = create_adaptive_options_for_problem("ill_conditioned", 10);
514        assert!(ill_cond.adaptation_rate > 0.5);
515        assert!(ill_cond.use_stagnation_detection);
516
517        let high_dim = create_adaptive_options_for_problem("high_dimensional", 1000);
518        assert!(high_dim.initial_gtol > 1e-5); // Should be relaxed for high dimensions
519    }
520
521    #[test]
522    fn test_tolerance_bounds() {
523        let options = AdaptiveToleranceOptions {
524            min_tolerance: 1e-12,
525            max_tolerance: 1e-4,
526            adaptation_rate: 1.0, // Full adaptation
527            ..Default::default()
528        };
529        let mut state = AdaptiveToleranceState::new(options, 5);
530
531        // Force extreme adaptation by using very large values
532        for i in 0..10 {
533            state.update(1e10, 1e10, 1e10, i).unwrap();
534        }
535
536        // Tolerances should be clamped within bounds
537        assert!(state.current_ftol >= 1e-12);
538        assert!(state.current_ftol <= 1e-4);
539        assert!(state.current_gtol >= 1e-12);
540        assert!(state.current_gtol <= 1e-4);
541    }
542}