1use super::{
9 utils, StreamingConfig, StreamingDataPoint, StreamingObjective, StreamingOptimizer,
10 StreamingStats,
11};
12use crate::error::OptimizeError;
13use ndarray::{Array1, Array2, ArrayView1};
14use scirs2_linalg::solve;
21type Result<T> = std::result::Result<T, OptimizeError>;
24
25#[derive(Debug, Clone, Copy)]
27pub enum IncrementalNewtonMethod {
28 Exact,
30 BFGS,
32 LBFGS(usize), ShermanMorrison,
36}
37
38#[derive(Debug, Clone)]
40pub struct IncrementalNewton<T: StreamingObjective> {
41 parameters: Array1<f64>,
43 objective: T,
45 config: StreamingConfig,
47 stats: StreamingStats,
49 method: IncrementalNewtonMethod,
51 hessian_inv: Array2<f64>,
53 prev_gradient: Option<Array1<f64>>,
55 lbfgs_history: LBFGSHistory,
57 regularization: f64,
59 step_size: f64,
61}
62
63#[derive(Debug, Clone)]
65struct LBFGSHistory {
66 s_vectors: Vec<Array1<f64>>,
68 y_vectors: Vec<Array1<f64>>,
70 rho_values: Vec<f64>,
72 memory_size: usize,
74 current_pos: usize,
76 stored_count: usize,
78}
79
80impl LBFGSHistory {
81 fn new(_memorysize: usize) -> Self {
82 Self {
83 s_vectors: Vec::with_capacity(_memorysize),
84 y_vectors: Vec::with_capacity(_memorysize),
85 rho_values: Vec::with_capacity(_memorysize),
86 memory_size: _memorysize,
87 current_pos: 0,
88 stored_count: 0,
89 }
90 }
91
92 fn add_pair(&mut self, s: Array1<f64>, y: Array1<f64>) {
93 let rho = 1.0 / s.dot(&y);
94
95 if self.stored_count < self.memory_size {
96 self.s_vectors.push(s);
97 self.y_vectors.push(y);
98 self.rho_values.push(rho);
99 self.stored_count += 1;
100 } else {
101 self.s_vectors[self.current_pos] = s;
102 self.y_vectors[self.current_pos] = y;
103 self.rho_values[self.current_pos] = rho;
104 }
105
106 self.current_pos = (self.current_pos + 1) % self.memory_size;
107 }
108
109 fn compute_direction(&self, gradient: &ArrayView1<f64>) -> Array1<f64> {
110 if self.stored_count == 0 {
111 return -gradient.to_owned();
112 }
113
114 let mut q = gradient.to_owned();
115 let mut alpha = vec![0.0; self.stored_count];
116
117 for i in (0..self.stored_count).rev() {
119 let idx = (self.current_pos + self.memory_size - 1 - i) % self.memory_size;
120 alpha[i] = self.rho_values[idx] * self.s_vectors[idx].dot(&q);
121 q = &q - &(alpha[i] * &self.y_vectors[idx]);
122 }
123
124 if let (Some(s), Some(y)) = (self.s_vectors.last(), self.y_vectors.last()) {
126 let gamma = s.dot(y) / y.dot(y);
127 if gamma > 0.0 && gamma.is_finite() {
128 q = q * gamma;
129 }
130 }
131
132 for i in 0..self.stored_count {
134 let idx =
135 (self.current_pos + self.memory_size - self.stored_count + i) % self.memory_size;
136 let beta = self.rho_values[idx] * self.y_vectors[idx].dot(&q);
137 q = &q + &((alpha[self.stored_count - 1 - i] - beta) * &self.s_vectors[idx]);
138 }
139
140 -q
141 }
142
143 fn reset(&mut self) {
144 self.s_vectors.clear();
145 self.y_vectors.clear();
146 self.rho_values.clear();
147 self.current_pos = 0;
148 self.stored_count = 0;
149 }
150}
151
152impl<T: StreamingObjective> IncrementalNewton<T> {
153 pub fn new(
155 initial_parameters: Array1<f64>,
156 objective: T,
157 config: StreamingConfig,
158 method: IncrementalNewtonMethod,
159 ) -> Self {
160 let n_params = initial_parameters.len();
161 let memory_size = match method {
162 IncrementalNewtonMethod::LBFGS(m) => m,
163 _ => 10, };
165
166 Self {
167 parameters: initial_parameters,
168 objective,
169 config,
170 stats: StreamingStats::default(),
171 method,
172 hessian_inv: Array2::eye(n_params),
173 prev_gradient: None,
174 lbfgs_history: LBFGSHistory::new(memory_size),
175 regularization: 1e-8,
176 step_size: 1.0,
177 }
178 }
179
180 fn update_bfgs(&mut self, s: &ArrayView1<f64>, y: &ArrayView1<f64>) -> Result<()> {
182 let n = s.len();
183 let sy = s.dot(y);
184
185 if sy.abs() < 1e-12 {
186 return Ok(()); }
188
189 let rho = 1.0 / sy;
191
192 let mut hy = Array1::zeros(n);
194 for i in 0..n {
195 for j in 0..n {
196 hy[i] += self.hessian_inv[[i, j]] * y[j];
197 }
198 }
199
200 let yhy = y.dot(&hy);
201
202 for i in 0..n {
204 for j in 0..n {
205 self.hessian_inv[[i, j]] +=
206 rho * rho * yhy * s[i] * s[j] - rho * (hy[i] * s[j] + s[i] * hy[j]);
207 }
208 }
209
210 Ok(())
211 }
212
213 fn update_sherman_morrison(&mut self, u: &ArrayView1<f64>, v: &ArrayView1<f64>) -> Result<()> {
215 let n = u.len();
216
217 let mut hu = Array1::zeros(n);
219 for i in 0..n {
220 for j in 0..n {
221 hu[i] += self.hessian_inv[[i, j]] * u[j];
222 }
223 }
224
225 let vhu = v.dot(&hu);
226 if (1.0 + vhu).abs() < 1e-12 {
227 return Ok(()); }
229
230 let scale = 1.0 / (1.0 + vhu);
232 for i in 0..n {
233 for j in 0..n {
234 let hv_j = (0..n).map(|k| self.hessian_inv[[j, k]] * v[k]).sum::<f64>();
235 self.hessian_inv[[i, j]] -= scale * hu[i] * hv_j;
236 }
237 }
238
239 Ok(())
240 }
241
242 fn compute_newton_direction(&mut self, gradient: &ArrayView1<f64>) -> Result<Array1<f64>> {
244 match self.method {
245 IncrementalNewtonMethod::Exact => {
246 if let Some(data_point) = self.get_current_data_point() {
248 if let Some(hessian) = T::hessian(&self.parameters.view(), &data_point) {
249 let mut reg_hessian = hessian;
251 for i in 0..reg_hessian.nrows() {
252 reg_hessian[[i, i]] += self.regularization;
253 }
254
255 match solve(®_hessian.view(), &(-gradient).view(), None) {
256 Ok(direction) => return Ok(direction),
257 Err(_) => {
258 }
260 }
261 }
262 }
263
264 Ok(self.hessian_inv.dot(&(-gradient)))
266 }
267 IncrementalNewtonMethod::BFGS => Ok(self.hessian_inv.dot(&(-gradient))),
268 IncrementalNewtonMethod::LBFGS(_) => Ok(self.lbfgs_history.compute_direction(gradient)),
269 IncrementalNewtonMethod::ShermanMorrison => Ok(self.hessian_inv.dot(&(-gradient))),
270 }
271 }
272
273 fn line_search(
275 &self,
276 direction: &ArrayView1<f64>,
277 gradient: &ArrayView1<f64>,
278 data_point: &StreamingDataPoint,
279 ) -> f64 {
280 let mut alpha = self.step_size;
281 let c1 = 1e-4; let rho = 0.5; let current_f = self.objective.evaluate(&self.parameters.view(), data_point);
285 let directional_derivative = gradient.dot(direction);
286
287 for _ in 0..20 {
288 let trial_params = &self.parameters + &(alpha * direction);
290 let trial_f = self.objective.evaluate(&trial_params.view(), data_point);
291
292 if trial_f <= current_f + c1 * alpha * directional_derivative {
294 return alpha;
295 }
296
297 alpha *= rho;
298 }
299
300 alpha.max(1e-8) }
302
303 fn get_current_data_point(&self) -> Option<StreamingDataPoint> {
305 None
307 }
308}
309
310impl<T: StreamingObjective + Clone> StreamingOptimizer for IncrementalNewton<T> {
311 fn update(&mut self, datapoint: &StreamingDataPoint) -> Result<()> {
312 let start_time = std::time::Instant::now();
313
314 let gradient = self.objective.gradient(&self.parameters.view(), datapoint);
316
317 let direction = self.compute_newton_direction(&gradient.view())?;
319
320 let alpha = self.line_search(&direction.view(), &gradient.view(), datapoint);
322
323 let old_parameters = self.parameters.clone();
325 let step = &direction * alpha;
326 self.parameters = &self.parameters + &step;
327
328 if let Some(prev_grad) = &self.prev_gradient {
330 let s = &step;
331 let y = &gradient - prev_grad;
332
333 match self.method {
334 IncrementalNewtonMethod::BFGS => {
335 self.update_bfgs(&s.view(), &y.view())?;
336 }
337 IncrementalNewtonMethod::LBFGS(_) => {
338 self.lbfgs_history.add_pair(s.clone(), y.clone());
339 }
340 IncrementalNewtonMethod::ShermanMorrison => {
341 if y.dot(&y) > 1e-12 {
343 self.update_sherman_morrison(&s.view(), &y.view())?;
344 }
345 }
346 IncrementalNewtonMethod::Exact => {
347 self.update_bfgs(&s.view(), &y.view())?;
350 }
351 }
352 }
353
354 self.prev_gradient = Some(gradient.clone());
356
357 let step_norm = step.mapv(|x| x * x).sum().sqrt();
359 if step_norm > 0.0 {
360 let param_norm = self.parameters.mapv(|x| x * x).sum().sqrt().max(1.0);
361 let relative_step = step_norm / param_norm;
362
363 if relative_step > 0.1 {
364 self.step_size *= 0.8; } else if relative_step < 0.01 {
366 self.step_size *= 1.2; }
368 self.step_size = self.step_size.max(1e-8).min(10.0);
369 }
370
371 self.stats.converged = utils::check_convergence(
373 &old_parameters.view(),
374 &self.parameters.view(),
375 self.config.tolerance,
376 );
377
378 let loss = self.objective.evaluate(&self.parameters.view(), datapoint);
380 self.stats.points_processed += 1;
381 self.stats.updates_performed += 1;
382 self.stats.current_loss = loss;
383 self.stats.average_loss = utils::ewma_update(self.stats.average_loss, loss, 0.01);
384
385 self.stats.processing_time_ms += start_time.elapsed().as_secs_f64() * 1000.0;
386
387 Ok(())
388 }
389
390 fn parameters(&self) -> &Array1<f64> {
391 &self.parameters
392 }
393
394 fn stats(&self) -> &StreamingStats {
395 &self.stats
396 }
397
398 fn reset(&mut self) {
399 let n = self.parameters.len();
400 self.hessian_inv = Array2::eye(n);
401 self.prev_gradient = None;
402 self.lbfgs_history.reset();
403 self.step_size = 1.0;
404 self.stats = StreamingStats::default();
405 }
406}
407
408#[allow(dead_code)]
410pub fn incremental_bfgs<T: StreamingObjective>(
411 initial_parameters: Array1<f64>,
412 objective: T,
413 config: Option<StreamingConfig>,
414) -> IncrementalNewton<T> {
415 let config = config.unwrap_or_default();
416 IncrementalNewton::new(
417 initial_parameters,
418 objective,
419 config,
420 IncrementalNewtonMethod::BFGS,
421 )
422}
423
424#[allow(dead_code)]
426pub fn incremental_lbfgs<T: StreamingObjective>(
427 initial_parameters: Array1<f64>,
428 objective: T,
429 memory_size: usize,
430 config: Option<StreamingConfig>,
431) -> IncrementalNewton<T> {
432 let config = config.unwrap_or_default();
433 IncrementalNewton::new(
434 initial_parameters,
435 objective,
436 config,
437 IncrementalNewtonMethod::LBFGS(memory_size),
438 )
439}
440
441#[allow(dead_code)]
443pub fn incremental_lbfgs_linear_regression(
444 n_features: usize,
445 memory_size: Option<usize>,
446 config: Option<StreamingConfig>,
447) -> IncrementalNewton<super::LinearRegressionObjective> {
448 let config = config.unwrap_or_default();
449 let memory = memory_size.unwrap_or(10);
450 let initial_params = Array1::zeros(n_features);
451 let objective = super::LinearRegressionObjective;
452
453 IncrementalNewton::new(
454 initial_params,
455 objective,
456 config,
457 IncrementalNewtonMethod::LBFGS(memory),
458 )
459}
460
461#[cfg(test)]
462mod tests {
463 use super::*;
464 use crate::streaming::{LinearRegressionObjective, StreamingDataPoint};
465
466 #[test]
467 fn test_incremental_newton_creation() {
468 let params = Array1::from(vec![0.0, 0.0]);
469 let objective = LinearRegressionObjective;
470 let config = StreamingConfig::default();
471
472 let optimizer = IncrementalNewton::new(
473 params.clone(),
474 objective,
475 config,
476 IncrementalNewtonMethod::BFGS,
477 );
478 assert_eq!(optimizer.parameters(), ¶ms);
479 assert!(matches!(optimizer.method, IncrementalNewtonMethod::BFGS));
480 }
481
482 #[test]
483 fn test_lbfgs_history() {
484 let mut history = LBFGSHistory::new(3);
485
486 let s1 = Array1::from(vec![1.0, 0.0]);
487 let y1 = Array1::from(vec![0.5, 0.0]);
488 history.add_pair(s1, y1);
489
490 assert_eq!(history.stored_count, 1);
491
492 let gradient = Array1::from(vec![1.0, 1.0]);
493 let direction = history.compute_direction(&gradient.view());
494
495 assert!(direction[0] < 0.0);
497 assert!(direction[1] < 0.0);
498 }
499
500 #[test]
501 fn test_incremental_bfgs_update() {
502 let mut optimizer = incremental_bfgs(
503 Array1::from(vec![0.0, 0.0]),
504 LinearRegressionObjective,
505 None,
506 );
507
508 let features = Array1::from(vec![1.0, 2.0]);
509 let target = 3.0;
510 let point = StreamingDataPoint::new(features, target);
511
512 assert!(optimizer.update(&point).is_ok());
513 assert_eq!(optimizer.stats().points_processed, 1);
514 }
515
516 #[test]
517 fn test_incremental_lbfgs_update() {
518 let mut optimizer = incremental_lbfgs_linear_regression(2, Some(5), None);
519
520 let features = Array1::from(vec![1.0, 2.0]);
521 let target = 3.0;
522 let point = StreamingDataPoint::new(features, target);
523
524 assert!(optimizer.update(&point).is_ok());
525 assert_eq!(optimizer.stats().points_processed, 1);
526 }
527
528 #[test]
529 fn test_bfgs_hessian_update() {
530 let params = Array1::from(vec![1.0, 1.0]);
531 let objective = LinearRegressionObjective;
532 let config = StreamingConfig::default();
533
534 let mut optimizer =
535 IncrementalNewton::new(params, objective, config, IncrementalNewtonMethod::BFGS);
536
537 let s = Array1::from(vec![0.1, 0.2]);
538 let y = Array1::from(vec![0.05, 0.1]);
539
540 let original_hessian = optimizer.hessian_inv.clone();
541 optimizer.update_bfgs(&s.view(), &y.view()).unwrap();
542
543 assert!(&optimizer.hessian_inv != &original_hessian);
545 }
546
547 #[test]
548 fn test_sherman_morrison_update() {
549 let params = Array1::from(vec![1.0, 1.0]);
550 let objective = LinearRegressionObjective;
551 let config = StreamingConfig::default();
552
553 let mut optimizer = IncrementalNewton::new(
554 params,
555 objective,
556 config,
557 IncrementalNewtonMethod::ShermanMorrison,
558 );
559
560 let u = Array1::from(vec![0.1, 0.2]);
561 let v = Array1::from(vec![0.3, 0.4]);
562
563 let original_hessian = optimizer.hessian_inv.clone();
564 optimizer
565 .update_sherman_morrison(&u.view(), &v.view())
566 .unwrap();
567
568 assert!(&optimizer.hessian_inv != &original_hessian);
570 }
571
572 #[test]
573 fn test_convergence_with_multiple_updates() {
574 let mut config = StreamingConfig::default();
575 config.tolerance = 1e-3;
576
577 let mut optimizer = incremental_lbfgs_linear_regression(2, Some(5), Some(config));
578
579 let data_points = vec![
581 StreamingDataPoint::new(Array1::from(vec![1.0, 0.0]), 2.0),
582 StreamingDataPoint::new(Array1::from(vec![0.0, 1.0]), 3.0),
583 StreamingDataPoint::new(Array1::from(vec![1.0, 1.0]), 5.0),
584 ];
585
586 for _epoch in 0..20 {
587 for point in &data_points {
588 optimizer.update(point).unwrap();
589 if optimizer.converged() {
590 break;
591 }
592 }
593 if optimizer.converged() {
594 break;
595 }
596 }
597
598 let params = optimizer.parameters();
600 assert!(optimizer.stats().updates_performed > 0);
601 }
602}