scirs2_optimize/streaming/
incremental_newton.rs

1//! Incremental Newton Methods for Streaming Optimization
2//!
3//! This module implements incremental Newton and quasi-Newton methods that
4//! maintain second-order information efficiently for streaming data scenarios.
5//! These methods are particularly useful when Hessian information is available
6//! or can be approximated incrementally.
7
8use super::{
9    utils, StreamingConfig, StreamingDataPoint, StreamingObjective, StreamingOptimizer,
10    StreamingStats,
11};
12use crate::error::OptimizeError;
13use ndarray::{Array1, Array2, ArrayView1};
14// Unused import
15// use ndarray::ArrayView2;
16// Unused import
17// use scirs2_core::error::CoreResult;
18// Unused import
19// use scirs2_linalg::{cholesky, solve, LinalgError};
20use scirs2_linalg::solve;
21// Unused import: LinalgError
22
23type Result<T> = std::result::Result<T, OptimizeError>;
24
25/// Types of incremental Newton methods
26#[derive(Debug, Clone, Copy)]
27pub enum IncrementalNewtonMethod {
28    /// Full Newton with exact Hessian (when available)
29    Exact,
30    /// BFGS quasi-Newton approximation
31    BFGS,
32    /// L-BFGS with limited memory
33    LBFGS(usize), // memory size
34    /// Sherman-Morrison rank-1 updates
35    ShermanMorrison,
36}
37
38/// Incremental Newton optimizer
39#[derive(Debug, Clone)]
40pub struct IncrementalNewton<T: StreamingObjective> {
41    /// Current parameter estimates
42    parameters: Array1<f64>,
43    /// Objective function
44    objective: T,
45    /// Configuration
46    config: StreamingConfig,
47    /// Statistics
48    stats: StreamingStats,
49    /// Method type
50    method: IncrementalNewtonMethod,
51    /// Current Hessian approximation or inverse Hessian
52    hessian_inv: Array2<f64>,
53    /// Previous gradient for BFGS updates
54    prev_gradient: Option<Array1<f64>>,
55    /// L-BFGS history storage
56    lbfgs_history: LBFGSHistory,
57    /// Regularization for numerical stability
58    regularization: f64,
59    /// Line search step size
60    step_size: f64,
61}
62
63/// Storage for L-BFGS history
64#[derive(Debug, Clone)]
65struct LBFGSHistory {
66    /// Storage for s vectors (parameter differences)
67    s_vectors: Vec<Array1<f64>>,
68    /// Storage for y vectors (gradient differences)  
69    y_vectors: Vec<Array1<f64>>,
70    /// Storage for rho values (1 / (y^T s))
71    rho_values: Vec<f64>,
72    /// Maximum memory size
73    memory_size: usize,
74    /// Current position in circular buffer
75    current_pos: usize,
76    /// Number of stored vectors
77    stored_count: usize,
78}
79
80impl LBFGSHistory {
81    fn new(_memorysize: usize) -> Self {
82        Self {
83            s_vectors: Vec::with_capacity(_memorysize),
84            y_vectors: Vec::with_capacity(_memorysize),
85            rho_values: Vec::with_capacity(_memorysize),
86            memory_size: _memorysize,
87            current_pos: 0,
88            stored_count: 0,
89        }
90    }
91
92    fn add_pair(&mut self, s: Array1<f64>, y: Array1<f64>) {
93        let rho = 1.0 / s.dot(&y);
94
95        if self.stored_count < self.memory_size {
96            self.s_vectors.push(s);
97            self.y_vectors.push(y);
98            self.rho_values.push(rho);
99            self.stored_count += 1;
100        } else {
101            self.s_vectors[self.current_pos] = s;
102            self.y_vectors[self.current_pos] = y;
103            self.rho_values[self.current_pos] = rho;
104        }
105
106        self.current_pos = (self.current_pos + 1) % self.memory_size;
107    }
108
109    fn compute_direction(&self, gradient: &ArrayView1<f64>) -> Array1<f64> {
110        if self.stored_count == 0 {
111            return -gradient.to_owned();
112        }
113
114        let mut q = gradient.to_owned();
115        let mut alpha = vec![0.0; self.stored_count];
116
117        // First loop (backward)
118        for i in (0..self.stored_count).rev() {
119            let idx = (self.current_pos + self.memory_size - 1 - i) % self.memory_size;
120            alpha[i] = self.rho_values[idx] * self.s_vectors[idx].dot(&q);
121            q = &q - &(alpha[i] * &self.y_vectors[idx]);
122        }
123
124        // Initial Hessian approximation (use identity scaled by most recent curvature)
125        if let (Some(s), Some(y)) = (self.s_vectors.last(), self.y_vectors.last()) {
126            let gamma = s.dot(y) / y.dot(y);
127            if gamma > 0.0 && gamma.is_finite() {
128                q = q * gamma;
129            }
130        }
131
132        // Second loop (forward)
133        for i in 0..self.stored_count {
134            let idx =
135                (self.current_pos + self.memory_size - self.stored_count + i) % self.memory_size;
136            let beta = self.rho_values[idx] * self.y_vectors[idx].dot(&q);
137            q = &q + &((alpha[self.stored_count - 1 - i] - beta) * &self.s_vectors[idx]);
138        }
139
140        -q
141    }
142
143    fn reset(&mut self) {
144        self.s_vectors.clear();
145        self.y_vectors.clear();
146        self.rho_values.clear();
147        self.current_pos = 0;
148        self.stored_count = 0;
149    }
150}
151
152impl<T: StreamingObjective> IncrementalNewton<T> {
153    /// Create a new incremental Newton optimizer
154    pub fn new(
155        initial_parameters: Array1<f64>,
156        objective: T,
157        config: StreamingConfig,
158        method: IncrementalNewtonMethod,
159    ) -> Self {
160        let n_params = initial_parameters.len();
161        let memory_size = match method {
162            IncrementalNewtonMethod::LBFGS(m) => m,
163            _ => 10, // Default memory size for other methods
164        };
165
166        Self {
167            parameters: initial_parameters,
168            objective,
169            config,
170            stats: StreamingStats::default(),
171            method,
172            hessian_inv: Array2::eye(n_params),
173            prev_gradient: None,
174            lbfgs_history: LBFGSHistory::new(memory_size),
175            regularization: 1e-8,
176            step_size: 1.0,
177        }
178    }
179
180    /// Update inverse Hessian using BFGS formula
181    fn update_bfgs(&mut self, s: &ArrayView1<f64>, y: &ArrayView1<f64>) -> Result<()> {
182        let n = s.len();
183        let sy = s.dot(y);
184
185        if sy.abs() < 1e-12 {
186            return Ok(()); // Skip update if curvature condition not satisfied
187        }
188
189        // BFGS update: H^{-1} = (I - ρsy^T)H^{-1}(I - ρys^T) + ρss^T
190        let rho = 1.0 / sy;
191
192        // Compute H^{-1} * y
193        let mut hy = Array1::zeros(n);
194        for i in 0..n {
195            for j in 0..n {
196                hy[i] += self.hessian_inv[[i, j]] * y[j];
197            }
198        }
199
200        let yhy = y.dot(&hy);
201
202        // Update formula: H^{-1} = H^{-1} + ρ²(yHy)ss^T - ρ(Hys^T + sy^TH)
203        for i in 0..n {
204            for j in 0..n {
205                self.hessian_inv[[i, j]] +=
206                    rho * rho * yhy * s[i] * s[j] - rho * (hy[i] * s[j] + s[i] * hy[j]);
207            }
208        }
209
210        Ok(())
211    }
212
213    /// Update using Sherman-Morrison formula for rank-1 updates
214    fn update_sherman_morrison(&mut self, u: &ArrayView1<f64>, v: &ArrayView1<f64>) -> Result<()> {
215        let n = u.len();
216
217        // Compute H^{-1} * u
218        let mut hu = Array1::zeros(n);
219        for i in 0..n {
220            for j in 0..n {
221                hu[i] += self.hessian_inv[[i, j]] * u[j];
222            }
223        }
224
225        let vhu = v.dot(&hu);
226        if (1.0 + vhu).abs() < 1e-12 {
227            return Ok(()); // Skip update if denominator too small
228        }
229
230        // Sherman-Morrison update: H^{-1} = H^{-1} - (H^{-1}uv^TH^{-1})/(1 + v^TH^{-1}u)
231        let scale = 1.0 / (1.0 + vhu);
232        for i in 0..n {
233            for j in 0..n {
234                let hv_j = (0..n).map(|k| self.hessian_inv[[j, k]] * v[k]).sum::<f64>();
235                self.hessian_inv[[i, j]] -= scale * hu[i] * hv_j;
236            }
237        }
238
239        Ok(())
240    }
241
242    /// Compute Newton direction based on method type
243    fn compute_newton_direction(&mut self, gradient: &ArrayView1<f64>) -> Result<Array1<f64>> {
244        match self.method {
245            IncrementalNewtonMethod::Exact => {
246                // Use exact Hessian if available from objective function
247                if let Some(data_point) = self.get_current_data_point() {
248                    if let Some(hessian) = T::hessian(&self.parameters.view(), &data_point) {
249                        // Add regularization for numerical stability
250                        let mut reg_hessian = hessian;
251                        for i in 0..reg_hessian.nrows() {
252                            reg_hessian[[i, i]] += self.regularization;
253                        }
254
255                        match solve(&reg_hessian.view(), &(-gradient).view(), None) {
256                            Ok(direction) => return Ok(direction),
257                            Err(_) => {
258                                // Fall back to inverse Hessian approximation
259                            }
260                        }
261                    }
262                }
263
264                // Fall back to using current inverse Hessian approximation
265                Ok(self.hessian_inv.dot(&(-gradient)))
266            }
267            IncrementalNewtonMethod::BFGS => Ok(self.hessian_inv.dot(&(-gradient))),
268            IncrementalNewtonMethod::LBFGS(_) => Ok(self.lbfgs_history.compute_direction(gradient)),
269            IncrementalNewtonMethod::ShermanMorrison => Ok(self.hessian_inv.dot(&(-gradient))),
270        }
271    }
272
273    /// Simple line search to determine step size
274    fn line_search(
275        &self,
276        direction: &ArrayView1<f64>,
277        gradient: &ArrayView1<f64>,
278        data_point: &StreamingDataPoint,
279    ) -> f64 {
280        let mut alpha = self.step_size;
281        let c1 = 1e-4; // Armijo condition parameter
282        let rho = 0.5; // Backtracking factor
283
284        let current_f = self.objective.evaluate(&self.parameters.view(), data_point);
285        let directional_derivative = gradient.dot(direction);
286
287        for _ in 0..20 {
288            // Maximum backtracking steps
289            let trial_params = &self.parameters + &(alpha * direction);
290            let trial_f = self.objective.evaluate(&trial_params.view(), data_point);
291
292            // Armijo condition
293            if trial_f <= current_f + c1 * alpha * directional_derivative {
294                return alpha;
295            }
296
297            alpha *= rho;
298        }
299
300        alpha.max(1e-8) // Minimum step size
301    }
302
303    /// Placeholder for getting current data point (would be passed in real implementation)
304    fn get_current_data_point(&self) -> Option<StreamingDataPoint> {
305        // In practice, this would be provided by the calling context
306        None
307    }
308}
309
310impl<T: StreamingObjective + Clone> StreamingOptimizer for IncrementalNewton<T> {
311    fn update(&mut self, datapoint: &StreamingDataPoint) -> Result<()> {
312        let start_time = std::time::Instant::now();
313
314        // Compute current gradient
315        let gradient = self.objective.gradient(&self.parameters.view(), datapoint);
316
317        // Compute Newton direction
318        let direction = self.compute_newton_direction(&gradient.view())?;
319
320        // Perform line search
321        let alpha = self.line_search(&direction.view(), &gradient.view(), datapoint);
322
323        // Update parameters
324        let old_parameters = self.parameters.clone();
325        let step = &direction * alpha;
326        self.parameters = &self.parameters + &step;
327
328        // Update second-order information
329        if let Some(prev_grad) = &self.prev_gradient {
330            let s = &step;
331            let y = &gradient - prev_grad;
332
333            match self.method {
334                IncrementalNewtonMethod::BFGS => {
335                    self.update_bfgs(&s.view(), &y.view())?;
336                }
337                IncrementalNewtonMethod::LBFGS(_) => {
338                    self.lbfgs_history.add_pair(s.clone(), y.clone());
339                }
340                IncrementalNewtonMethod::ShermanMorrison => {
341                    // Use gradient difference as rank-1 update
342                    if y.dot(&y) > 1e-12 {
343                        self.update_sherman_morrison(&s.view(), &y.view())?;
344                    }
345                }
346                IncrementalNewtonMethod::Exact => {
347                    // For exact method, we update the inverse Hessian approximation
348                    // using BFGS as a fallback
349                    self.update_bfgs(&s.view(), &y.view())?;
350                }
351            }
352        }
353
354        // Store current gradient for next iteration
355        self.prev_gradient = Some(gradient.clone());
356
357        // Update step size using simple adaptation
358        let step_norm = step.mapv(|x| x * x).sum().sqrt();
359        if step_norm > 0.0 {
360            let param_norm = self.parameters.mapv(|x| x * x).sum().sqrt().max(1.0);
361            let relative_step = step_norm / param_norm;
362
363            if relative_step > 0.1 {
364                self.step_size *= 0.8; // Reduce step size if too large
365            } else if relative_step < 0.01 {
366                self.step_size *= 1.2; // Increase step size if too small
367            }
368            self.step_size = self.step_size.max(1e-8).min(10.0);
369        }
370
371        // Check convergence
372        self.stats.converged = utils::check_convergence(
373            &old_parameters.view(),
374            &self.parameters.view(),
375            self.config.tolerance,
376        );
377
378        // Update statistics
379        let loss = self.objective.evaluate(&self.parameters.view(), datapoint);
380        self.stats.points_processed += 1;
381        self.stats.updates_performed += 1;
382        self.stats.current_loss = loss;
383        self.stats.average_loss = utils::ewma_update(self.stats.average_loss, loss, 0.01);
384
385        self.stats.processing_time_ms += start_time.elapsed().as_secs_f64() * 1000.0;
386
387        Ok(())
388    }
389
390    fn parameters(&self) -> &Array1<f64> {
391        &self.parameters
392    }
393
394    fn stats(&self) -> &StreamingStats {
395        &self.stats
396    }
397
398    fn reset(&mut self) {
399        let n = self.parameters.len();
400        self.hessian_inv = Array2::eye(n);
401        self.prev_gradient = None;
402        self.lbfgs_history.reset();
403        self.step_size = 1.0;
404        self.stats = StreamingStats::default();
405    }
406}
407
408/// Convenience function for incremental Newton with BFGS
409#[allow(dead_code)]
410pub fn incremental_bfgs<T: StreamingObjective>(
411    initial_parameters: Array1<f64>,
412    objective: T,
413    config: Option<StreamingConfig>,
414) -> IncrementalNewton<T> {
415    let config = config.unwrap_or_default();
416    IncrementalNewton::new(
417        initial_parameters,
418        objective,
419        config,
420        IncrementalNewtonMethod::BFGS,
421    )
422}
423
424/// Convenience function for incremental L-BFGS
425#[allow(dead_code)]
426pub fn incremental_lbfgs<T: StreamingObjective>(
427    initial_parameters: Array1<f64>,
428    objective: T,
429    memory_size: usize,
430    config: Option<StreamingConfig>,
431) -> IncrementalNewton<T> {
432    let config = config.unwrap_or_default();
433    IncrementalNewton::new(
434        initial_parameters,
435        objective,
436        config,
437        IncrementalNewtonMethod::LBFGS(memory_size),
438    )
439}
440
441/// Convenience function for L-BFGS linear regression
442#[allow(dead_code)]
443pub fn incremental_lbfgs_linear_regression(
444    n_features: usize,
445    memory_size: Option<usize>,
446    config: Option<StreamingConfig>,
447) -> IncrementalNewton<super::LinearRegressionObjective> {
448    let config = config.unwrap_or_default();
449    let memory = memory_size.unwrap_or(10);
450    let initial_params = Array1::zeros(n_features);
451    let objective = super::LinearRegressionObjective;
452
453    IncrementalNewton::new(
454        initial_params,
455        objective,
456        config,
457        IncrementalNewtonMethod::LBFGS(memory),
458    )
459}
460
461#[cfg(test)]
462mod tests {
463    use super::*;
464    use crate::streaming::{LinearRegressionObjective, StreamingDataPoint};
465
466    #[test]
467    fn test_incremental_newton_creation() {
468        let params = Array1::from(vec![0.0, 0.0]);
469        let objective = LinearRegressionObjective;
470        let config = StreamingConfig::default();
471
472        let optimizer = IncrementalNewton::new(
473            params.clone(),
474            objective,
475            config,
476            IncrementalNewtonMethod::BFGS,
477        );
478        assert_eq!(optimizer.parameters(), &params);
479        assert!(matches!(optimizer.method, IncrementalNewtonMethod::BFGS));
480    }
481
482    #[test]
483    fn test_lbfgs_history() {
484        let mut history = LBFGSHistory::new(3);
485
486        let s1 = Array1::from(vec![1.0, 0.0]);
487        let y1 = Array1::from(vec![0.5, 0.0]);
488        history.add_pair(s1, y1);
489
490        assert_eq!(history.stored_count, 1);
491
492        let gradient = Array1::from(vec![1.0, 1.0]);
493        let direction = history.compute_direction(&gradient.view());
494
495        // Direction should be approximately -gradient scaled
496        assert!(direction[0] < 0.0);
497        assert!(direction[1] < 0.0);
498    }
499
500    #[test]
501    fn test_incremental_bfgs_update() {
502        let mut optimizer = incremental_bfgs(
503            Array1::from(vec![0.0, 0.0]),
504            LinearRegressionObjective,
505            None,
506        );
507
508        let features = Array1::from(vec![1.0, 2.0]);
509        let target = 3.0;
510        let point = StreamingDataPoint::new(features, target);
511
512        assert!(optimizer.update(&point).is_ok());
513        assert_eq!(optimizer.stats().points_processed, 1);
514    }
515
516    #[test]
517    fn test_incremental_lbfgs_update() {
518        let mut optimizer = incremental_lbfgs_linear_regression(2, Some(5), None);
519
520        let features = Array1::from(vec![1.0, 2.0]);
521        let target = 3.0;
522        let point = StreamingDataPoint::new(features, target);
523
524        assert!(optimizer.update(&point).is_ok());
525        assert_eq!(optimizer.stats().points_processed, 1);
526    }
527
528    #[test]
529    fn test_bfgs_hessian_update() {
530        let params = Array1::from(vec![1.0, 1.0]);
531        let objective = LinearRegressionObjective;
532        let config = StreamingConfig::default();
533
534        let mut optimizer =
535            IncrementalNewton::new(params, objective, config, IncrementalNewtonMethod::BFGS);
536
537        let s = Array1::from(vec![0.1, 0.2]);
538        let y = Array1::from(vec![0.05, 0.1]);
539
540        let original_hessian = optimizer.hessian_inv.clone();
541        optimizer.update_bfgs(&s.view(), &y.view()).unwrap();
542
543        // Hessian should have changed
544        assert!(&optimizer.hessian_inv != &original_hessian);
545    }
546
547    #[test]
548    fn test_sherman_morrison_update() {
549        let params = Array1::from(vec![1.0, 1.0]);
550        let objective = LinearRegressionObjective;
551        let config = StreamingConfig::default();
552
553        let mut optimizer = IncrementalNewton::new(
554            params,
555            objective,
556            config,
557            IncrementalNewtonMethod::ShermanMorrison,
558        );
559
560        let u = Array1::from(vec![0.1, 0.2]);
561        let v = Array1::from(vec![0.3, 0.4]);
562
563        let original_hessian = optimizer.hessian_inv.clone();
564        optimizer
565            .update_sherman_morrison(&u.view(), &v.view())
566            .unwrap();
567
568        // Hessian should have changed
569        assert!(&optimizer.hessian_inv != &original_hessian);
570    }
571
572    #[test]
573    fn test_convergence_with_multiple_updates() {
574        let mut config = StreamingConfig::default();
575        config.tolerance = 1e-3;
576
577        let mut optimizer = incremental_lbfgs_linear_regression(2, Some(5), Some(config));
578
579        // Generate consistent data for y = 2*x1 + 3*x2
580        let data_points = vec![
581            StreamingDataPoint::new(Array1::from(vec![1.0, 0.0]), 2.0),
582            StreamingDataPoint::new(Array1::from(vec![0.0, 1.0]), 3.0),
583            StreamingDataPoint::new(Array1::from(vec![1.0, 1.0]), 5.0),
584        ];
585
586        for _epoch in 0..20 {
587            for point in &data_points {
588                optimizer.update(point).unwrap();
589                if optimizer.converged() {
590                    break;
591                }
592            }
593            if optimizer.converged() {
594                break;
595            }
596        }
597
598        // Should make some progress toward the solution
599        let params = optimizer.parameters();
600        assert!(optimizer.stats().updates_performed > 0);
601    }
602}