scirs2_optimize/streaming/
streaming_trust_region.rs1use super::{
8 utils, StreamingConfig, StreamingDataPoint, StreamingObjective, StreamingOptimizer,
9 StreamingStats,
10};
11use crate::error::OptimizeError;
12use ndarray::{Array1, Array2, ArrayView1};
13use scirs2_linalg::solve;
16type Result<T> = std::result::Result<T, OptimizeError>;
20
21#[derive(Debug, Clone)]
23pub struct StreamingTrustRegion<T: StreamingObjective> {
24 parameters: Array1<f64>,
26 objective: T,
28 config: StreamingConfig,
30 stats: StreamingStats,
32 trust_radius: f64,
34 hessian_approx: Array2<f64>,
36 gradient_accumulator: Array1<f64>,
38 gradient_count: usize,
40 success_ratio: f64,
42 prev_function_value: f64,
44}
45
46impl<T: StreamingObjective> StreamingTrustRegion<T> {
47 pub fn new(
49 initial_parameters: Array1<f64>,
50 objective: T,
51 config: StreamingConfig,
52 initial_trust_radius: f64,
53 ) -> Self {
54 let n_params = initial_parameters.len();
55 Self {
56 parameters: initial_parameters,
57 objective,
58 config,
59 stats: StreamingStats::default(),
60 trust_radius: initial_trust_radius,
61 hessian_approx: Array2::eye(n_params), gradient_accumulator: Array1::zeros(n_params),
63 gradient_count: 0,
64 success_ratio: 0.5,
65 prev_function_value: f64::INFINITY,
66 }
67 }
68
69 fn solve_trust_region_subproblem(&self, gradient: &ArrayView1<f64>) -> Result<Array1<f64>> {
71 let n = gradient.len();
72
73 let mut regularized_hessian = self.hessian_approx.clone();
75 for i in 0..n {
76 regularized_hessian[[i, i]] += self.config.regularization;
77 }
78
79 match solve(®ularized_hessian.view(), &(-gradient).view(), None) {
81 Ok(newton_step) => {
82 let step_norm = newton_step.mapv(|x| x * x).sum().sqrt();
83
84 if step_norm <= self.trust_radius {
85 Ok(newton_step)
87 } else {
88 Ok(newton_step * (self.trust_radius / step_norm))
90 }
91 }
92 Err(_) => {
93 let grad_norm = gradient.mapv(|x| x * x).sum().sqrt();
95 if grad_norm > 0.0 {
96 let cauchy_step = -(self.trust_radius / grad_norm) * gradient;
97 Ok(cauchy_step.to_owned())
98 } else {
99 Ok(Array1::zeros(n))
100 }
101 }
102 }
103 }
104
105 fn update_hessian_approximation(
107 &mut self,
108 step: &ArrayView1<f64>,
109 grad_diff: &ArrayView1<f64>,
110 ) {
111 let rho = step.dot(grad_diff);
112
113 if rho.abs() < 1e-12 {
114 return; }
116
117 self.hessian_approx *= self.config.forgetting_factor;
119
120 let n = step.len();
122 let mut outer_yy = Array2::zeros((n, n));
123 let mut hs = Array1::zeros(n);
124
125 for i in 0..n {
127 for j in 0..n {
128 hs[i] += self.hessian_approx[[i, j]] * step[j];
129 }
130 }
131
132 let shs = step.dot(&hs);
133
134 if shs > 1e-12 {
135 for i in 0..n {
137 for j in 0..n {
138 outer_yy[[i, j]] = grad_diff[i] * grad_diff[j];
139 self.hessian_approx[[i, j]] += outer_yy[[i, j]] / rho - (hs[i] * hs[j]) / shs;
140 }
141 }
142 }
143
144 let min_eigenvalue = self.estimate_min_eigenvalue();
146 if min_eigenvalue < self.config.regularization {
147 for i in 0..n {
148 self.hessian_approx[[i, i]] += self.config.regularization - min_eigenvalue;
149 }
150 }
151 }
152
153 fn estimate_min_eigenvalue(&self) -> f64 {
155 let n = self.hessian_approx.nrows();
156 let mut min_est = f64::INFINITY;
157
158 for i in 0..n {
159 let diagonal = self.hessian_approx[[i, i]];
160 let off_diagonal_sum: f64 = (0..n)
161 .filter(|&j| j != i)
162 .map(|j| self.hessian_approx[[i, j]].abs())
163 .sum();
164
165 let lower_bound = diagonal - off_diagonal_sum;
166 min_est = min_est.min(lower_bound);
167 }
168
169 min_est
170 }
171
172 fn compute_trust_region_ratio(
174 &self,
175 step: &ArrayView1<f64>,
176 gradient: &ArrayView1<f64>,
177 actual_reduction: f64,
178 ) -> f64 {
179 let linear_term = -gradient.dot(step);
181
182 let mut quadratic_term = 0.0;
183 for i in 0..step.len() {
184 for j in 0..step.len() {
185 quadratic_term += step[i] * self.hessian_approx[[i, j]] * step[j];
186 }
187 }
188 quadratic_term *= 0.5;
189
190 let predicted_reduction = linear_term + quadratic_term;
191
192 if predicted_reduction.abs() < 1e-12 {
193 0.0
194 } else {
195 actual_reduction / predicted_reduction
196 }
197 }
198
199 fn update_trust_radius(&mut self, ratio: f64, stepnorm: f64) {
201 const VERY_SUCCESSFUL: f64 = 0.75;
202 const SUCCESSFUL: f64 = 0.25;
203 const EXPANSION_FACTOR: f64 = 2.0;
204 const CONTRACTION_FACTOR: f64 = 0.25;
205 const MAX_TRUST_RADIUS: f64 = 1e6;
206 const MIN_TRUST_RADIUS: f64 = 1e-12;
207
208 if ratio >= VERY_SUCCESSFUL && stepnorm >= 0.8 * self.trust_radius {
209 self.trust_radius = (self.trust_radius * EXPANSION_FACTOR).min(MAX_TRUST_RADIUS);
211 } else if ratio < SUCCESSFUL {
212 self.trust_radius = (self.trust_radius * CONTRACTION_FACTOR).max(MIN_TRUST_RADIUS);
214 }
215 self.success_ratio = utils::ewma_update(self.success_ratio, ratio, 0.1);
219 }
220}
221
222impl<T: StreamingObjective + Clone> StreamingOptimizer for StreamingTrustRegion<T> {
223 fn update(&mut self, datapoint: &StreamingDataPoint) -> Result<()> {
224 let start_time = std::time::Instant::now();
225
226 let current_f = self.objective.evaluate(&self.parameters.view(), datapoint);
228
229 let gradient = self.objective.gradient(&self.parameters.view(), datapoint);
231
232 if self.gradient_count == 0 {
234 self.gradient_accumulator = gradient.clone();
235 } else {
236 let alpha = 1.0 / (self.gradient_count as f64 + 1.0).min(10.0); self.gradient_accumulator =
238 &((1.0 - alpha) * &self.gradient_accumulator) + &(alpha * &gradient);
239 }
240 self.gradient_count += 1;
241
242 let effective_gradient = if self.gradient_count >= 3 {
244 &self.gradient_accumulator
245 } else {
246 &gradient
247 };
248
249 let step = self.solve_trust_region_subproblem(&effective_gradient.view())?;
251 let step_norm = step.mapv(|x| x * x).sum().sqrt();
252
253 let trial_parameters = &self.parameters + &step;
255 let trial_f = self.objective.evaluate(&trial_parameters.view(), datapoint);
256
257 let actual_reduction = current_f - trial_f;
259
260 let ratio = self.compute_trust_region_ratio(
262 &step.view(),
263 &effective_gradient.view(),
264 actual_reduction,
265 );
266
267 const ACCEPTANCE_THRESHOLD: f64 = 0.1;
269 if ratio >= ACCEPTANCE_THRESHOLD {
270 let old_parameters = self.parameters.clone();
272 self.parameters = trial_parameters;
273
274 if self.stats.updates_performed > 0 {
276 let grad_diff = &gradient - &self.gradient_accumulator;
277 self.update_hessian_approximation(&step.view(), &grad_diff.view());
278 }
279
280 self.stats.converged = utils::check_convergence(
282 &old_parameters.view(),
283 &self.parameters.view(),
284 self.config.tolerance,
285 );
286
287 self.stats.updates_performed += 1;
288 self.prev_function_value = trial_f;
289 } else {
290 }
292
293 self.update_trust_radius(ratio, step_norm);
295
296 self.stats.points_processed += 1;
298 self.stats.current_loss = if ratio >= ACCEPTANCE_THRESHOLD {
299 trial_f
300 } else {
301 current_f
302 };
303 self.stats.average_loss =
304 utils::ewma_update(self.stats.average_loss, self.stats.current_loss, 0.01);
305
306 self.stats.processing_time_ms += start_time.elapsed().as_secs_f64() * 1000.0;
307
308 Ok(())
309 }
310
311 fn parameters(&self) -> &Array1<f64> {
312 &self.parameters
313 }
314
315 fn stats(&self) -> &StreamingStats {
316 &self.stats
317 }
318
319 fn reset(&mut self) {
320 let n = self.parameters.len();
321 self.hessian_approx = Array2::eye(n);
322 self.gradient_accumulator = Array1::zeros(n);
323 self.gradient_count = 0;
324 self.success_ratio = 0.5;
325 self.prev_function_value = f64::INFINITY;
326 self.stats = StreamingStats::default();
327 }
328}
329
330#[allow(dead_code)]
332pub fn streaming_trust_region_linear_regression(
333 n_features: usize,
334 config: Option<StreamingConfig>,
335 initial_trust_radius: Option<f64>,
336) -> StreamingTrustRegion<super::LinearRegressionObjective> {
337 let config = config.unwrap_or_default();
338 let trust_radius = initial_trust_radius.unwrap_or(1.0);
339 let initial_params = Array1::zeros(n_features);
340 let objective = super::LinearRegressionObjective;
341
342 StreamingTrustRegion::new(initial_params, objective, config, trust_radius)
343}
344
345#[allow(dead_code)]
347pub fn streaming_trust_region_logistic_regression(
348 n_features: usize,
349 config: Option<StreamingConfig>,
350 initial_trust_radius: Option<f64>,
351) -> StreamingTrustRegion<super::LogisticRegressionObjective> {
352 let config = config.unwrap_or_default();
353 let trust_radius = initial_trust_radius.unwrap_or(1.0);
354 let initial_params = Array1::zeros(n_features);
355 let objective = super::LogisticRegressionObjective;
356
357 StreamingTrustRegion::new(initial_params, objective, config, trust_radius)
358}
359
360#[cfg(test)]
361mod tests {
362 use super::*;
363 use crate::streaming::{LinearRegressionObjective, StreamingDataPoint};
364
365 #[test]
366 fn test_streaming_trust_region_creation() {
367 let params = Array1::from(vec![0.0, 0.0]);
368 let objective = LinearRegressionObjective;
369 let config = StreamingConfig::default();
370 let trust_radius = 1.0;
371
372 let optimizer = StreamingTrustRegion::new(params.clone(), objective, config, trust_radius);
373 assert_eq!(optimizer.parameters(), ¶ms);
374 assert_eq!(optimizer.trust_radius, 1.0);
375 }
376
377 #[test]
378 fn test_trust_region_subproblem_solving() {
379 let params = Array1::from(vec![0.0, 0.0]);
380 let objective = LinearRegressionObjective;
381 let config = StreamingConfig::default();
382 let trust_radius = 1.0;
383
384 let optimizer = StreamingTrustRegion::new(params, objective, config, trust_radius);
385 let gradient = Array1::from(vec![1.0, 2.0]);
386
387 let step = optimizer
388 .solve_trust_region_subproblem(&gradient.view())
389 .unwrap();
390 let step_norm = step.mapv(|x| x * x).sum().sqrt();
391
392 assert!(step_norm <= trust_radius + 1e-10);
394 }
395
396 #[test]
397 fn test_streaming_trust_region_update() {
398 let mut optimizer = streaming_trust_region_linear_regression(2, None, Some(1.0));
399
400 let features = Array1::from(vec![1.0, 2.0]);
401 let target = 3.0;
402 let point = StreamingDataPoint::new(features, target);
403
404 assert!(optimizer.update(&point).is_ok());
405 assert_eq!(optimizer.stats().points_processed, 1);
406 }
407
408 #[test]
409 fn test_hessian_update() {
410 let params = Array1::from(vec![1.0, 1.0]);
411 let objective = LinearRegressionObjective;
412 let mut config = StreamingConfig::default();
413 config.regularization = 1e-6;
414
415 let mut optimizer = StreamingTrustRegion::new(params, objective, config, 1.0);
416
417 let step = Array1::from(vec![0.1, 0.2]);
418 let grad_diff = Array1::from(vec![0.05, 0.1]);
419
420 let original_hessian = optimizer.hessian_approx.clone();
421 optimizer.update_hessian_approximation(&step.view(), &grad_diff.view());
422
423 assert!(&optimizer.hessian_approx != &original_hessian);
425 }
426
427 #[test]
428 fn test_trust_radius_adaptation() {
429 let params = Array1::from(vec![0.0, 0.0]);
430 let objective = LinearRegressionObjective;
431 let config = StreamingConfig::default();
432 let initial_radius = 1.0;
433
434 let mut optimizer = StreamingTrustRegion::new(params, objective, config, initial_radius);
435
436 optimizer.update_trust_radius(0.9, 0.9); assert!(optimizer.trust_radius > initial_radius);
439
440 optimizer.update_trust_radius(0.1, 0.5); assert!(optimizer.trust_radius < initial_radius);
443 }
444
445 #[test]
446 fn test_convergence_detection() {
447 let mut config = StreamingConfig::default();
448 config.tolerance = 1e-2;
449 config.learning_rate = 0.5;
450
451 let mut optimizer = streaming_trust_region_linear_regression(2, Some(config), Some(1.0));
452
453 let point = StreamingDataPoint::new(Array1::from(vec![0.0, 0.0]), 0.0);
455
456 for _ in 0..10 {
458 optimizer.update(&point).unwrap();
459 if optimizer.converged() {
460 break;
461 }
462 }
463
464 assert!(optimizer.converged() || optimizer.stats().updates_performed < 10);
466 }
467}