scirs2_optimize/streaming/
streaming_trust_region.rs

1//! Streaming Trust Region Methods
2//!
3//! This module implements trust region methods adapted for streaming data and
4//! real-time optimization scenarios. The methods maintain approximate Hessian
5//! information and adapt the trust region radius based on streaming performance.
6
7use super::{
8    utils, StreamingConfig, StreamingDataPoint, StreamingObjective, StreamingOptimizer,
9    StreamingStats,
10};
11use crate::error::OptimizeError;
12use ndarray::{Array1, Array2, ArrayView1};
13// Unused import
14// use scirs2_core::error::CoreResult;
15use scirs2_linalg::solve;
16// Unused import
17// use scirs2_linalg::LinalgError;
18
19type Result<T> = std::result::Result<T, OptimizeError>;
20
21/// Streaming Trust Region optimizer
22#[derive(Debug, Clone)]
23pub struct StreamingTrustRegion<T: StreamingObjective> {
24    /// Current parameter estimates
25    parameters: Array1<f64>,
26    /// Objective function
27    objective: T,
28    /// Configuration
29    config: StreamingConfig,
30    /// Statistics
31    stats: StreamingStats,
32    /// Current trust region radius
33    trust_radius: f64,
34    /// Approximate Hessian matrix (updated incrementally)
35    hessian_approx: Array2<f64>,
36    /// Gradient accumulator for improved estimates
37    gradient_accumulator: Array1<f64>,
38    /// Number of gradient accumulations
39    gradient_count: usize,
40    /// Success ratio for trust region adaptation
41    success_ratio: f64,
42    /// Previous function value for ratio computation
43    prev_function_value: f64,
44}
45
46impl<T: StreamingObjective> StreamingTrustRegion<T> {
47    /// Create a new streaming trust region optimizer
48    pub fn new(
49        initial_parameters: Array1<f64>,
50        objective: T,
51        config: StreamingConfig,
52        initial_trust_radius: f64,
53    ) -> Self {
54        let n_params = initial_parameters.len();
55        Self {
56            parameters: initial_parameters,
57            objective,
58            config,
59            stats: StreamingStats::default(),
60            trust_radius: initial_trust_radius,
61            hessian_approx: Array2::eye(n_params), // Start with identity
62            gradient_accumulator: Array1::zeros(n_params),
63            gradient_count: 0,
64            success_ratio: 0.5,
65            prev_function_value: f64::INFINITY,
66        }
67    }
68
69    /// Solve the trust region subproblem using Cauchy point method
70    fn solve_trust_region_subproblem(&self, gradient: &ArrayView1<f64>) -> Result<Array1<f64>> {
71        let n = gradient.len();
72
73        // For numerical stability, add regularization to Hessian
74        let mut regularized_hessian = self.hessian_approx.clone();
75        for i in 0..n {
76            regularized_hessian[[i, i]] += self.config.regularization;
77        }
78
79        // Try to solve the Newton system first
80        match solve(&regularized_hessian.view(), &(-gradient).view(), None) {
81            Ok(newton_step) => {
82                let step_norm = newton_step.mapv(|x| x * x).sum().sqrt();
83
84                if step_norm <= self.trust_radius {
85                    // Newton step is within trust region
86                    Ok(newton_step)
87                } else {
88                    // Scale Newton step to trust region boundary
89                    Ok(newton_step * (self.trust_radius / step_norm))
90                }
91            }
92            Err(_) => {
93                // Fall back to steepest descent (Cauchy point)
94                let grad_norm = gradient.mapv(|x| x * x).sum().sqrt();
95                if grad_norm > 0.0 {
96                    let cauchy_step = -(self.trust_radius / grad_norm) * gradient;
97                    Ok(cauchy_step.to_owned())
98                } else {
99                    Ok(Array1::zeros(n))
100                }
101            }
102        }
103    }
104
105    /// Update the Hessian approximation using BFGS-like formula
106    fn update_hessian_approximation(
107        &mut self,
108        step: &ArrayView1<f64>,
109        grad_diff: &ArrayView1<f64>,
110    ) {
111        let rho = step.dot(grad_diff);
112
113        if rho.abs() < 1e-12 {
114            return; // Skip update if curvature condition is not satisfied
115        }
116
117        // Apply forgetting factor to previous Hessian
118        self.hessian_approx *= self.config.forgetting_factor;
119
120        // BFGS update: H = H + (y*y^T)/(y^T*s) - (H*s*s^T*H)/(s^T*H*s)
121        let n = step.len();
122        let mut outer_yy = Array2::zeros((n, n));
123        let mut hs = Array1::zeros(n);
124
125        // Compute H*s
126        for i in 0..n {
127            for j in 0..n {
128                hs[i] += self.hessian_approx[[i, j]] * step[j];
129            }
130        }
131
132        let shs = step.dot(&hs);
133
134        if shs > 1e-12 {
135            // Compute outer products
136            for i in 0..n {
137                for j in 0..n {
138                    outer_yy[[i, j]] = grad_diff[i] * grad_diff[j];
139                    self.hessian_approx[[i, j]] += outer_yy[[i, j]] / rho - (hs[i] * hs[j]) / shs;
140                }
141            }
142        }
143
144        // Ensure positive definiteness by adding regularization if needed
145        let min_eigenvalue = self.estimate_min_eigenvalue();
146        if min_eigenvalue < self.config.regularization {
147            for i in 0..n {
148                self.hessian_approx[[i, i]] += self.config.regularization - min_eigenvalue;
149            }
150        }
151    }
152
153    /// Estimate minimum eigenvalue using Gershgorin circles
154    fn estimate_min_eigenvalue(&self) -> f64 {
155        let n = self.hessian_approx.nrows();
156        let mut min_est = f64::INFINITY;
157
158        for i in 0..n {
159            let diagonal = self.hessian_approx[[i, i]];
160            let off_diagonal_sum: f64 = (0..n)
161                .filter(|&j| j != i)
162                .map(|j| self.hessian_approx[[i, j]].abs())
163                .sum();
164
165            let lower_bound = diagonal - off_diagonal_sum;
166            min_est = min_est.min(lower_bound);
167        }
168
169        min_est
170    }
171
172    /// Compute the trust region ratio for step acceptance
173    fn compute_trust_region_ratio(
174        &self,
175        step: &ArrayView1<f64>,
176        gradient: &ArrayView1<f64>,
177        actual_reduction: f64,
178    ) -> f64 {
179        // Predicted _reduction: m(0) - m(step) ≈ -g^T*step - 0.5*step^T*H*step
180        let linear_term = -gradient.dot(step);
181
182        let mut quadratic_term = 0.0;
183        for i in 0..step.len() {
184            for j in 0..step.len() {
185                quadratic_term += step[i] * self.hessian_approx[[i, j]] * step[j];
186            }
187        }
188        quadratic_term *= 0.5;
189
190        let predicted_reduction = linear_term + quadratic_term;
191
192        if predicted_reduction.abs() < 1e-12 {
193            0.0
194        } else {
195            actual_reduction / predicted_reduction
196        }
197    }
198
199    /// Update trust region radius based on success ratio
200    fn update_trust_radius(&mut self, ratio: f64, stepnorm: f64) {
201        const VERY_SUCCESSFUL: f64 = 0.75;
202        const SUCCESSFUL: f64 = 0.25;
203        const EXPANSION_FACTOR: f64 = 2.0;
204        const CONTRACTION_FACTOR: f64 = 0.25;
205        const MAX_TRUST_RADIUS: f64 = 1e6;
206        const MIN_TRUST_RADIUS: f64 = 1e-12;
207
208        if ratio >= VERY_SUCCESSFUL && stepnorm >= 0.8 * self.trust_radius {
209            // Very successful step near boundary: expand trust region
210            self.trust_radius = (self.trust_radius * EXPANSION_FACTOR).min(MAX_TRUST_RADIUS);
211        } else if ratio < SUCCESSFUL {
212            // Unsuccessful step: contract trust region
213            self.trust_radius = (self.trust_radius * CONTRACTION_FACTOR).max(MIN_TRUST_RADIUS);
214        }
215        // Otherwise, keep trust radius unchanged
216
217        // Update success ratio with exponential smoothing
218        self.success_ratio = utils::ewma_update(self.success_ratio, ratio, 0.1);
219    }
220}
221
222impl<T: StreamingObjective + Clone> StreamingOptimizer for StreamingTrustRegion<T> {
223    fn update(&mut self, datapoint: &StreamingDataPoint) -> Result<()> {
224        let start_time = std::time::Instant::now();
225
226        // Evaluate current function value
227        let current_f = self.objective.evaluate(&self.parameters.view(), datapoint);
228
229        // Compute gradient
230        let gradient = self.objective.gradient(&self.parameters.view(), datapoint);
231
232        // Accumulate gradient for better estimates (exponential smoothing)
233        if self.gradient_count == 0 {
234            self.gradient_accumulator = gradient.clone();
235        } else {
236            let alpha = 1.0 / (self.gradient_count as f64 + 1.0).min(10.0); // Adaptive averaging
237            self.gradient_accumulator =
238                &((1.0 - alpha) * &self.gradient_accumulator) + &(alpha * &gradient);
239        }
240        self.gradient_count += 1;
241
242        // Use accumulated gradient for more stable updates
243        let effective_gradient = if self.gradient_count >= 3 {
244            &self.gradient_accumulator
245        } else {
246            &gradient
247        };
248
249        // Solve trust region subproblem
250        let step = self.solve_trust_region_subproblem(&effective_gradient.view())?;
251        let step_norm = step.mapv(|x| x * x).sum().sqrt();
252
253        // Trial _point
254        let trial_parameters = &self.parameters + &step;
255        let trial_f = self.objective.evaluate(&trial_parameters.view(), datapoint);
256
257        // Compute actual reduction
258        let actual_reduction = current_f - trial_f;
259
260        // Compute trust region ratio
261        let ratio = self.compute_trust_region_ratio(
262            &step.view(),
263            &effective_gradient.view(),
264            actual_reduction,
265        );
266
267        // Accept or reject step
268        const ACCEPTANCE_THRESHOLD: f64 = 0.1;
269        if ratio >= ACCEPTANCE_THRESHOLD {
270            // Accept step
271            let old_parameters = self.parameters.clone();
272            self.parameters = trial_parameters;
273
274            // Update Hessian approximation if we have previous gradient
275            if self.stats.updates_performed > 0 {
276                let grad_diff = &gradient - &self.gradient_accumulator;
277                self.update_hessian_approximation(&step.view(), &grad_diff.view());
278            }
279
280            // Check convergence
281            self.stats.converged = utils::check_convergence(
282                &old_parameters.view(),
283                &self.parameters.view(),
284                self.config.tolerance,
285            );
286
287            self.stats.updates_performed += 1;
288            self.prev_function_value = trial_f;
289        } else {
290            // Reject step - only update trust radius
291        }
292
293        // Update trust region radius
294        self.update_trust_radius(ratio, step_norm);
295
296        // Update statistics
297        self.stats.points_processed += 1;
298        self.stats.current_loss = if ratio >= ACCEPTANCE_THRESHOLD {
299            trial_f
300        } else {
301            current_f
302        };
303        self.stats.average_loss =
304            utils::ewma_update(self.stats.average_loss, self.stats.current_loss, 0.01);
305
306        self.stats.processing_time_ms += start_time.elapsed().as_secs_f64() * 1000.0;
307
308        Ok(())
309    }
310
311    fn parameters(&self) -> &Array1<f64> {
312        &self.parameters
313    }
314
315    fn stats(&self) -> &StreamingStats {
316        &self.stats
317    }
318
319    fn reset(&mut self) {
320        let n = self.parameters.len();
321        self.hessian_approx = Array2::eye(n);
322        self.gradient_accumulator = Array1::zeros(n);
323        self.gradient_count = 0;
324        self.success_ratio = 0.5;
325        self.prev_function_value = f64::INFINITY;
326        self.stats = StreamingStats::default();
327    }
328}
329
330/// Convenience function for streaming trust region with linear regression
331#[allow(dead_code)]
332pub fn streaming_trust_region_linear_regression(
333    n_features: usize,
334    config: Option<StreamingConfig>,
335    initial_trust_radius: Option<f64>,
336) -> StreamingTrustRegion<super::LinearRegressionObjective> {
337    let config = config.unwrap_or_default();
338    let trust_radius = initial_trust_radius.unwrap_or(1.0);
339    let initial_params = Array1::zeros(n_features);
340    let objective = super::LinearRegressionObjective;
341
342    StreamingTrustRegion::new(initial_params, objective, config, trust_radius)
343}
344
345/// Convenience function for streaming trust region with logistic regression
346#[allow(dead_code)]
347pub fn streaming_trust_region_logistic_regression(
348    n_features: usize,
349    config: Option<StreamingConfig>,
350    initial_trust_radius: Option<f64>,
351) -> StreamingTrustRegion<super::LogisticRegressionObjective> {
352    let config = config.unwrap_or_default();
353    let trust_radius = initial_trust_radius.unwrap_or(1.0);
354    let initial_params = Array1::zeros(n_features);
355    let objective = super::LogisticRegressionObjective;
356
357    StreamingTrustRegion::new(initial_params, objective, config, trust_radius)
358}
359
360#[cfg(test)]
361mod tests {
362    use super::*;
363    use crate::streaming::{LinearRegressionObjective, StreamingDataPoint};
364
365    #[test]
366    fn test_streaming_trust_region_creation() {
367        let params = Array1::from(vec![0.0, 0.0]);
368        let objective = LinearRegressionObjective;
369        let config = StreamingConfig::default();
370        let trust_radius = 1.0;
371
372        let optimizer = StreamingTrustRegion::new(params.clone(), objective, config, trust_radius);
373        assert_eq!(optimizer.parameters(), &params);
374        assert_eq!(optimizer.trust_radius, 1.0);
375    }
376
377    #[test]
378    fn test_trust_region_subproblem_solving() {
379        let params = Array1::from(vec![0.0, 0.0]);
380        let objective = LinearRegressionObjective;
381        let config = StreamingConfig::default();
382        let trust_radius = 1.0;
383
384        let optimizer = StreamingTrustRegion::new(params, objective, config, trust_radius);
385        let gradient = Array1::from(vec![1.0, 2.0]);
386
387        let step = optimizer
388            .solve_trust_region_subproblem(&gradient.view())
389            .unwrap();
390        let step_norm = step.mapv(|x| x * x).sum().sqrt();
391
392        // Step should be within trust region
393        assert!(step_norm <= trust_radius + 1e-10);
394    }
395
396    #[test]
397    fn test_streaming_trust_region_update() {
398        let mut optimizer = streaming_trust_region_linear_regression(2, None, Some(1.0));
399
400        let features = Array1::from(vec![1.0, 2.0]);
401        let target = 3.0;
402        let point = StreamingDataPoint::new(features, target);
403
404        assert!(optimizer.update(&point).is_ok());
405        assert_eq!(optimizer.stats().points_processed, 1);
406    }
407
408    #[test]
409    fn test_hessian_update() {
410        let params = Array1::from(vec![1.0, 1.0]);
411        let objective = LinearRegressionObjective;
412        let mut config = StreamingConfig::default();
413        config.regularization = 1e-6;
414
415        let mut optimizer = StreamingTrustRegion::new(params, objective, config, 1.0);
416
417        let step = Array1::from(vec![0.1, 0.2]);
418        let grad_diff = Array1::from(vec![0.05, 0.1]);
419
420        let original_hessian = optimizer.hessian_approx.clone();
421        optimizer.update_hessian_approximation(&step.view(), &grad_diff.view());
422
423        // Hessian should have changed
424        assert!(&optimizer.hessian_approx != &original_hessian);
425    }
426
427    #[test]
428    fn test_trust_radius_adaptation() {
429        let params = Array1::from(vec![0.0, 0.0]);
430        let objective = LinearRegressionObjective;
431        let config = StreamingConfig::default();
432        let initial_radius = 1.0;
433
434        let mut optimizer = StreamingTrustRegion::new(params, objective, config, initial_radius);
435
436        // Test expansion with very successful step
437        optimizer.update_trust_radius(0.9, 0.9); // High ratio, near boundary
438        assert!(optimizer.trust_radius > initial_radius);
439
440        // Test contraction with unsuccessful step
441        optimizer.update_trust_radius(0.1, 0.5); // Low ratio
442        assert!(optimizer.trust_radius < initial_radius);
443    }
444
445    #[test]
446    fn test_convergence_detection() {
447        let mut config = StreamingConfig::default();
448        config.tolerance = 1e-2;
449        config.learning_rate = 0.5;
450
451        let mut optimizer = streaming_trust_region_linear_regression(2, Some(config), Some(1.0));
452
453        // Use simple data that should converge quickly
454        let point = StreamingDataPoint::new(Array1::from(vec![0.0, 0.0]), 0.0);
455
456        // Should converge quickly with zero gradient
457        for _ in 0..10 {
458            optimizer.update(&point).unwrap();
459            if optimizer.converged() {
460                break;
461            }
462        }
463
464        // Should detect convergence when parameters don't change much
465        assert!(optimizer.converged() || optimizer.stats().updates_performed < 10);
466    }
467}