1use crate::error::{StatsError, StatsResult};
7use scirs2_core::ndarray::{Array1, Array2, ArrayBase, ArrayView1, ArrayView2, Data, Ix2};
8use scirs2_core::numeric::{Float, NumCast, One, Zero};
9use scirs2_core::simd_ops::SimdUnifiedOps;
10use std::cmp::Ordering;
11use std::collections::VecDeque;
12use std::sync::{Arc, Mutex};
13
14#[derive(Debug, Clone)]
16pub struct MemoryConstraints {
17 pub max_memory_bytes: usize,
19 pub chunksize: usize,
21 pub use_memory_mapping: bool,
23 pub enable_gc_hints: bool,
25}
26
27impl Default for MemoryConstraints {
28 fn default() -> Self {
29 Self {
31 max_memory_bytes: 1_024 * 1_024 * 1_024,
32 chunksize: 64 * 1024,
33 use_memory_mapping: true,
34 enable_gc_hints: true,
35 }
36 }
37}
38
39pub struct AdaptiveMemoryManager {
41 constraints: MemoryConstraints,
42 current_usage: Arc<Mutex<usize>>,
43 peak_usage: Arc<Mutex<usize>>,
44 operation_history: Arc<Mutex<VecDeque<OperationMetrics>>>,
45}
46
47#[derive(Debug, Clone)]
48pub struct OperationMetrics {
49 operation_type: String,
50 memory_used: usize,
51 processing_time: std::time::Duration,
52 chunksize_used: usize,
53}
54
55impl AdaptiveMemoryManager {
56 pub fn new(constraints: MemoryConstraints) -> Self {
57 Self {
58 constraints,
59 current_usage: Arc::new(Mutex::new(0)),
60 peak_usage: Arc::new(Mutex::new(0)),
61 operation_history: Arc::new(Mutex::new(VecDeque::with_capacity(100))),
62 }
63 }
64
65 pub fn get_optimal_chunksize(&self, datasize: usize, elementsize: usize) -> usize {
67 let current_usage = *self.current_usage.lock().unwrap();
68 let available_memory = self
69 .constraints
70 .max_memory_bytes
71 .saturating_sub(current_usage);
72
73 let max_chunk_memory = available_memory * 4 / 5;
75 let max_chunk_elements = max_chunk_memory / elementsize;
76
77 let optimalsize = max_chunk_elements
79 .min(datasize)
80 .min(self.constraints.chunksize);
81 optimalsize.next_power_of_two() / 2 }
83
84 pub fn record_operation(&self, metrics: OperationMetrics) {
86 let mut history = self.operation_history.lock().unwrap();
87
88 if history.len() >= 100 {
90 history.pop_front();
91 }
92
93 history.push_back(metrics.clone());
94
95 let mut peak = self.peak_usage.lock().unwrap();
97 *peak = (*peak).max(metrics.memory_used);
98 }
99
100 pub fn get_statistics(&self) -> MemoryStatistics {
102 let current_usage = *self.current_usage.lock().unwrap();
103 let peak_usage = *self.peak_usage.lock().unwrap();
104 let history = self.operation_history.lock().unwrap();
105
106 let avg_memory_per_op = if !history.is_empty() {
107 history.iter().map(|m| m.memory_used).sum::<usize>() / history.len()
108 } else {
109 0
110 };
111
112 MemoryStatistics {
113 current_usage,
114 peak_usage,
115 avg_memory_per_operation: avg_memory_per_op,
116 operations_completed: history.len(),
117 memory_efficiency: if peak_usage > 0 {
118 (avg_memory_per_op as f64 / peak_usage as f64) * 100.0
119 } else {
120 100.0
121 },
122 }
123 }
124}
125
126#[derive(Debug, Clone)]
127pub struct MemoryStatistics {
128 pub current_usage: usize,
129 pub peak_usage: usize,
130 pub avg_memory_per_operation: usize,
131 pub operations_completed: usize,
132 pub memory_efficiency: f64,
133}
134
135#[allow(dead_code)]
140pub fn corrcoef_memory_aware<F>(
141 data: &ArrayView2<F>,
142 method: &str,
143 manager: &mut AdaptiveMemoryManager,
144) -> StatsResult<Array2<F>>
145where
146 F: Float
147 + NumCast
148 + SimdUnifiedOps
149 + Zero
150 + One
151 + Copy
152 + Send
153 + Sync
154 + std::iter::Sum<F>
155 + std::fmt::Debug
156 + std::fmt::Display,
157{
158 let start_time = std::time::Instant::now();
159
160 checkarray_finite_2d(data, "data")?;
161
162 let (n_obs, n_vars) = data.dim();
163 let elementsize = std::mem::size_of::<F>();
164
165 let matrix_memory = n_vars * n_vars * elementsize;
167 let temp_memory = n_obs * elementsize * 2; let total_estimated = matrix_memory + temp_memory;
169
170 let mut corr_matrix = Array2::<F>::zeros((n_vars, n_vars));
171
172 for i in 0..n_vars {
174 corr_matrix[[i, i]] = F::one();
175 }
176
177 if total_estimated <= manager.constraints.max_memory_bytes {
178 corr_matrix = compute_correlation_matrix_standard(data, method)?;
180 } else {
181 let blocksize = manager.get_optimal_chunksize(n_vars, elementsize * n_vars);
183 corr_matrix = compute_correlation_matrix_blocked(data, method, blocksize)?;
184 }
185
186 let metrics = OperationMetrics {
188 operation_type: format!("corrcoef_memory_aware_{}", method),
189 memory_used: total_estimated,
190 processing_time: start_time.elapsed(),
191 chunksize_used: n_vars,
192 };
193 manager.record_operation(metrics);
194
195 Ok(corr_matrix)
196}
197
198#[allow(dead_code)]
200pub fn cache_oblivious_matrix_mult<F>(
201 a: &ArrayView2<F>,
202 b: &ArrayView2<F>,
203 threshold: usize,
204) -> StatsResult<Array2<F>>
205where
206 F: Float + NumCast + Zero + One + Copy + Send + Sync + std::fmt::Display,
207{
208 let (m, n) = a.dim();
209 let (n2, p) = b.dim();
210
211 if n != n2 {
212 return Err(StatsError::DimensionMismatch(
213 "Matrix dimensions don't match for multiplication".to_string(),
214 ));
215 }
216
217 let mut result = Array2::<F>::zeros((m, p));
218
219 if m <= threshold && n <= threshold && p <= threshold {
220 for i in 0..m {
222 for j in 0..p {
223 let mut sum = F::zero();
224 for k in 0..n {
225 sum = sum + a[[i, k]] * b[[k, j]];
226 }
227 result[[i, j]] = sum;
228 }
229 }
230 } else {
231 let mid_m = m / 2;
233 let _mid_n = n / 2;
234 let _mid_p = p / 2;
235
236 if m > threshold {
239 let a_top = a.slice(scirs2_core::ndarray::s![..mid_m, ..]);
240 let a_bottom = a.slice(scirs2_core::ndarray::s![mid_m.., ..]);
241
242 let result_top = cache_oblivious_matrix_mult(&a_top, b, threshold)?;
243 let result_bottom = cache_oblivious_matrix_mult(&a_bottom, b, threshold)?;
244
245 result
246 .slice_mut(scirs2_core::ndarray::s![..mid_m, ..])
247 .assign(&result_top);
248 result
249 .slice_mut(scirs2_core::ndarray::s![mid_m.., ..])
250 .assign(&result_bottom);
251 }
252 }
253
254 Ok(result)
255}
256
257#[allow(dead_code)]
259pub fn streaming_covariance_matrix<'a, F>(
260 data_chunks: impl Iterator<Item = ArrayView2<'a, F>>,
261 manager: &mut AdaptiveMemoryManager,
262) -> StatsResult<Array2<F>>
263where
264 F: Float + NumCast + Zero + One + Copy + 'a + std::fmt::Display,
265{
266 let start_time = std::time::Instant::now();
267
268 let mut n_vars = 0;
269 let mut total_obs = 0;
270 let mut sum_x = Array1::<F>::zeros(0);
271 let mut sum_x2 = Array2::<F>::zeros((0, 0));
272 let mut initialized = false;
273
274 for chunk in data_chunks {
275 let (chunk_obs, chunk_vars) = chunk.dim();
276
277 if !initialized {
278 n_vars = chunk_vars;
279 sum_x = Array1::<F>::zeros(n_vars);
280 sum_x2 = Array2::<F>::zeros((n_vars, n_vars));
281 initialized = true;
282 } else if chunk_vars != n_vars {
283 return Err(StatsError::DimensionMismatch(
284 "All _chunks must have the same number of variables".to_string(),
285 ));
286 }
287
288 total_obs += chunk_obs;
289
290 for i in 0..chunk_obs {
292 let row = chunk.row(i);
293
294 for j in 0..n_vars {
296 sum_x[j] = sum_x[j] + row[j];
297 }
298
299 for j in 0..n_vars {
301 for k in j..n_vars {
302 let product = row[j] * row[k];
303 sum_x2[[j, k]] = sum_x2[[j, k]] + product;
304 if j != k {
305 sum_x2[[k, j]] = sum_x2[[k, j]] + product;
306 }
307 }
308 }
309 }
310 }
311
312 if total_obs == 0 {
313 return Err(StatsError::InvalidArgument("No data provided".to_string()));
314 }
315
316 let mut cov_matrix = Array2::<F>::zeros((n_vars, n_vars));
318 let n_f = F::from(total_obs).unwrap();
319
320 for i in 0..n_vars {
321 for j in 0..n_vars {
322 let mean_i = sum_x[i] / n_f;
323 let mean_j = sum_x[j] / n_f;
324 let cov = (sum_x2[[i, j]] / n_f) - (mean_i * mean_j);
325 cov_matrix[[i, j]] = cov;
326 }
327 }
328
329 let memory_used = (n_vars * n_vars + n_vars) * std::mem::size_of::<F>();
331 let metrics = OperationMetrics {
332 operation_type: "streaming_covariance_matrix".to_string(),
333 memory_used,
334 processing_time: start_time.elapsed(),
335 chunksize_used: n_vars,
336 };
337 manager.record_operation(metrics);
338
339 Ok(cov_matrix)
340}
341
342#[allow(dead_code)]
344pub fn pca_memory_efficient<F>(
345 data: &ArrayView2<F>,
346 n_components: Option<usize>,
347 manager: &mut AdaptiveMemoryManager,
348) -> StatsResult<PCAResult<F>>
349where
350 F: Float + NumCast + Zero + One + Copy + Send + Sync + std::fmt::Debug + std::fmt::Display,
351{
352 let start_time = std::time::Instant::now();
353
354 let (n_obs, n_vars) = data.dim();
355 let n_components = n_components.unwrap_or(n_vars.min(n_obs));
356
357 let mut means = Array1::<F>::zeros(n_vars);
359 for i in 0..n_vars {
360 let column = data.column(i);
361 means[i] = column.iter().fold(F::zero(), |acc, &x| acc + x) / F::from(n_obs).unwrap();
362 }
363
364 let centereddata_memory = n_obs * n_vars * std::mem::size_of::<F>();
366
367 if centereddata_memory <= manager.constraints.max_memory_bytes / 2 {
368 let mut centereddata = Array2::<F>::zeros((n_obs, n_vars));
370 for i in 0..n_obs {
371 for j in 0..n_vars {
372 centereddata[[i, j]] = data[[i, j]] - means[j];
373 }
374 }
375
376 let cov_matrix = compute_covariance_from_centered(¢ereddata.view())?;
378
379 let (eigenvectors, eigenvalues) =
381 compute_eigendecomposition(&cov_matrix.view(), n_components)?;
382
383 let transformed = matrix_multiply(¢ereddata.view(), &eigenvectors.view())?;
385
386 let result = PCAResult {
387 components: eigenvectors,
388 explained_variance: eigenvalues,
389 transformeddata: transformed,
390 mean: means,
391 };
392
393 let metrics = OperationMetrics {
394 operation_type: "pca_memory_efficient".to_string(),
395 memory_used: centereddata_memory,
396 processing_time: start_time.elapsed(),
397 chunksize_used: n_vars,
398 };
399 manager.record_operation(metrics);
400
401 Ok(result)
402 } else {
403 incremental_pca(data, n_components, &means, manager)
405 }
406}
407
408#[derive(Debug, Clone)]
409pub struct PCAResult<F> {
410 pub components: Array2<F>,
411 pub explained_variance: Array1<F>,
412 pub transformeddata: Array2<F>,
413 pub mean: Array1<F>,
414}
415
416#[allow(dead_code)]
418pub fn streaming_pca_enhanced<'a, F>(
419 data_chunks: impl Iterator<Item = ArrayView2<'a, F>>,
420 n_components: usize,
421 manager: &mut AdaptiveMemoryManager,
422) -> StatsResult<PCAResult<F>>
423where
424 F: Float + NumCast + Zero + One + Copy + 'a + std::fmt::Display,
425{
426 let start_time = std::time::Instant::now();
427
428 let mut n_samples_ = 0;
430 let mut n_features = 0;
431 let mut running_mean = Array1::<F>::zeros(0);
432 let mut running_cov = Array2::<F>::zeros((0, 0));
433 let mut initialized = false;
434
435 for chunk in data_chunks {
436 let (chunk_samples, chunk_features) = chunk.dim();
437
438 if !initialized {
439 n_features = chunk_features;
440 running_mean = Array1::<F>::zeros(n_features);
441 running_cov = Array2::<F>::zeros((n_features, n_features));
442 initialized = true;
443 } else if chunk_features != n_features {
444 return Err(StatsError::DimensionMismatch(
445 "All _chunks must have same number of features".to_string(),
446 ));
447 }
448
449 for i in 0..chunk_samples {
451 n_samples_ += 1;
452 let row = chunk.row(i);
453
454 let n_f = F::from(n_samples_).unwrap();
456 for j in 0..n_features {
457 let delta = row[j] - running_mean[j];
458 running_mean[j] = running_mean[j] + delta / n_f;
459 }
460
461 if n_samples_ > 1 {
463 for j in 0..n_features {
464 for k in j..n_features {
465 let prod = (row[j] - running_mean[j]) * (row[k] - running_mean[k]);
466 running_cov[[j, k]] =
467 running_cov[[j, k]] * F::from(n_samples_ - 1).unwrap() / n_f
468 + prod / n_f;
469 if j != k {
470 running_cov[[k, j]] = running_cov[[j, k]];
471 }
472 }
473 }
474 }
475 }
476 }
477
478 if n_samples_ == 0 {
479 return Err(StatsError::InvalidArgument("No data provided".to_string()));
480 }
481
482 let (components, explained_variance) =
484 compute_eigendecomposition(&running_cov.view(), n_components)?;
485
486 let memory_used =
488 n_features * n_features * std::mem::size_of::<F>() + n_features * std::mem::size_of::<F>();
489 manager.record_operation(OperationMetrics {
490 operation_type: "streaming_pca_enhanced".to_string(),
491 memory_used,
492 processing_time: start_time.elapsed(),
493 chunksize_used: manager.constraints.chunksize,
494 });
495
496 Ok(PCAResult {
497 components,
498 explained_variance,
499 transformeddata: Array2::zeros((0, 0)), mean: running_mean,
501 })
502}
503
504#[allow(dead_code)]
506pub fn streaming_histogram_adaptive<'a, F>(
507 data_chunks: impl Iterator<Item = ArrayView1<'a, F>>,
508 manager: &mut AdaptiveMemoryManager,
509) -> StatsResult<(Array1<F>, Array1<usize>)>
510where
511 F: Float + NumCast + Zero + One + Copy + PartialOrd + 'a + std::fmt::Display,
512{
513 let start_time = std::time::Instant::now();
514
515 let mut min_val = F::infinity();
516 let mut max_val = F::neg_infinity();
517 let mut total_count = 0;
518 let mut values = Vec::new(); for chunk in data_chunks {
522 for &value in chunk.iter() {
523 if value < min_val {
524 min_val = value;
525 }
526 if value > max_val {
527 max_val = value;
528 }
529 total_count += 1;
530
531 if values.len() < manager.constraints.chunksize {
533 values.push(value);
534 } else {
535 let j = {
536 use scirs2_core::random::Rng;
537 let mut rng = scirs2_core::random::thread_rng();
538 rng.gen_range(0..total_count)
539 };
540 if j < values.len() {
541 values[j] = value;
542 }
543 }
544 }
545 }
546
547 if total_count == 0 {
548 return Err(StatsError::InvalidArgument("No data provided".to_string()));
549 }
550
551 values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal));
553 let q75_idx = (values.len() as f64 * 0.75) as usize;
554 let q25_idx = (values.len() as f64 * 0.25) as usize;
555 let iqr = values[q75_idx] - values[q25_idx];
556 let h = F::from(2.0).unwrap() * iqr
557 / F::from(total_count as f64)
558 .unwrap()
559 .powf(F::from(1.0 / 3.0).unwrap());
560 let n_bins = if h > F::zero() {
561 ((max_val - min_val) / h)
562 .to_usize()
563 .unwrap_or(50)
564 .max(10)
565 .min(1000)
566 } else {
567 50 };
569
570 let bin_width = (max_val - min_val) / F::from(n_bins).unwrap();
571 let mut bin_edges = Array1::<F>::zeros(n_bins + 1);
572 let mut counts = Array1::<usize>::zeros(n_bins);
573
574 for i in 0..=n_bins {
576 bin_edges[i] = min_val + F::from(i).unwrap() * bin_width;
577 }
578
579 for &value in values.iter() {
582 if value >= min_val && value <= max_val {
583 let bin_idx = ((value - min_val) / bin_width)
584 .to_usize()
585 .unwrap_or(0)
586 .min(n_bins - 1);
587 counts[bin_idx] += 1;
588 }
589 }
590
591 let memory_used = n_bins * (std::mem::size_of::<F>() + std::mem::size_of::<usize>());
592 manager.record_operation(OperationMetrics {
593 operation_type: "streaming_histogram_adaptive".to_string(),
594 memory_used,
595 processing_time: start_time.elapsed(),
596 chunksize_used: manager.constraints.chunksize,
597 });
598
599 Ok((bin_edges, counts))
600}
601
602#[allow(dead_code)]
604pub fn streaming_quantiles_p2<'a, F>(
605 data_chunks: impl Iterator<Item = ArrayView1<'a, F>>,
606 quantiles: &[f64],
607 manager: &mut AdaptiveMemoryManager,
608) -> StatsResult<Array1<F>>
609where
610 F: Float + NumCast + Zero + One + Copy + PartialOrd + 'a + std::fmt::Display,
611{
612 let start_time = std::time::Instant::now();
613
614 let mut p2_estimators: Vec<P2Estimator<F>> =
615 quantiles.iter().map(|&q| P2Estimator::new(q)).collect();
616
617 let mut total_count = 0;
618
619 for chunk in data_chunks {
620 for &value in chunk.iter() {
621 total_count += 1;
622 for estimator in &mut p2_estimators {
623 estimator.update(value);
624 }
625 }
626 }
627
628 if total_count == 0 {
629 return Err(StatsError::InvalidArgument("No data provided".to_string()));
630 }
631
632 let results: Vec<F> = p2_estimators.iter().map(|est| est.quantile()).collect();
633
634 let memory_used = quantiles.len() * std::mem::size_of::<P2Estimator<F>>();
635 manager.record_operation(OperationMetrics {
636 operation_type: "streaming_quantiles_p2".to_string(),
637 memory_used,
638 processing_time: start_time.elapsed(),
639 chunksize_used: manager.constraints.chunksize,
640 });
641
642 Ok(Array1::from_vec(results))
643}
644
645#[derive(Debug, Clone)]
647struct P2Estimator<F> {
648 quantile: f64,
649 markers: [F; 5],
650 positions: [f64; 5],
651 desired_positions: [f64; 5],
652 increment: [f64; 5],
653 count: usize,
654}
655
656impl<F> P2Estimator<F>
657where
658 F: Float + NumCast + Copy + PartialOrd + std::fmt::Display,
659{
660 fn new(quantile: f64) -> Self {
661 let mut estimator = Self {
662 quantile,
663 markers: [F::zero(); 5],
664 positions: [1.0, 2.0, 3.0, 4.0, 5.0],
665 desired_positions: [
666 1.0,
667 1.0 + 2.0 * quantile,
668 1.0 + 4.0 * quantile,
669 3.0 + 2.0 * quantile,
670 5.0,
671 ],
672 increment: [0.0, quantile / 2.0, quantile, (1.0 + quantile) / 2.0, 1.0],
673 count: 0,
674 };
675
676 estimator.desired_positions[1] = 1.0 + 2.0 * quantile;
678 estimator.desired_positions[2] = 1.0 + 4.0 * quantile;
679 estimator.desired_positions[3] = 3.0 + 2.0 * quantile;
680
681 estimator
682 }
683
684 fn update(&mut self, value: F) {
685 self.count += 1;
686
687 if self.count <= 5 {
688 self.markers[self.count - 1] = value;
690 if self.count == 5 {
691 self.markers
692 .sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
693 }
694 return;
695 }
696
697 let mut k = 0;
699 for i in 0..4 {
700 if value >= self.markers[i] {
701 k = i + 1;
702 }
703 }
704
705 if k == 0 {
706 self.markers[0] = value;
707 k = 1;
708 } else if k == 5 {
709 self.markers[4] = value;
710 k = 4;
711 }
712
713 for i in k..5 {
715 self.positions[i] += 1.0;
716 }
717
718 for i in 0..5 {
720 self.desired_positions[i] += self.increment[i];
721 }
722
723 for i in 1..4 {
725 let d = self.desired_positions[i] - self.positions[i];
726 if (d >= 1.0 && self.positions[i + 1] - self.positions[i] > 1.0)
727 || (d <= -1.0 && self.positions[i - 1] - self.positions[i] < -1.0)
728 {
729 let d_sign = if d >= 0.0 { 1.0 } else { -1.0 };
730 let new_height = self.parabolic_prediction(i, d_sign);
731
732 if self.markers[i - 1] < new_height && new_height < self.markers[i + 1] {
733 self.markers[i] = new_height;
734 } else {
735 self.markers[i] = self.linear_prediction(i, d_sign);
736 }
737 self.positions[i] += d_sign;
738 }
739 }
740 }
741
742 fn parabolic_prediction(&self, i: usize, d: f64) -> F {
743 let qi = self.markers[i];
744 let qi_prev = self.markers[i - 1];
745 let qi_next = self.markers[i + 1];
746 let ni = self.positions[i];
747 let ni_prev = self.positions[i - 1];
748 let ni_next = self.positions[i + 1];
749
750 let d_f = F::from(d).unwrap();
751 let ni_f = F::from(ni).unwrap();
752 let ni_prev_f = F::from(ni_prev).unwrap();
753 let ni_next_f = F::from(ni_next).unwrap();
754
755 let a = d_f / (ni_next_f - ni_prev_f);
756 let b1 = (ni_f - ni_prev_f + d_f) * (qi_next - qi) / (ni_next_f - ni_f);
757 let b2 = (ni_next_f - ni_f - d_f) * (qi - qi_prev) / (ni_f - ni_prev_f);
758
759 qi + a * (b1 + b2)
760 }
761
762 fn linear_prediction(&self, i: usize, d: f64) -> F {
763 if d > 0.0 {
764 let qi_next = self.markers[i + 1];
765 let qi = self.markers[i];
766 let ni_next = self.positions[i + 1];
767 let ni = self.positions[i];
768 qi + (qi_next - qi) * F::from(d / (ni_next - ni)).unwrap()
769 } else {
770 let qi = self.markers[i];
771 let qi_prev = self.markers[i - 1];
772 let ni = self.positions[i];
773 let ni_prev = self.positions[i - 1];
774 qi + (qi_prev - qi) * F::from(-d / (ni - ni_prev)).unwrap()
775 }
776 }
777
778 fn quantile(&self) -> F {
779 if self.count < 5 {
780 let mut sorted = self.markers[..self.count.min(5)].to_vec();
782 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
783 let idx = (self.quantile * (sorted.len() - 1) as f64) as usize;
784 sorted[idx]
785 } else {
786 self.markers[2] }
788 }
789}
790
791#[allow(dead_code)]
793pub fn streaming_regression_enhanced<'a, F>(
794 data_chunks: impl Iterator<Item = (ArrayView2<'a, F>, ArrayView1<'a, F>)>,
795 regularization: F,
796 manager: &mut AdaptiveMemoryManager,
797) -> StatsResult<Array1<F>>
798where
799 F: Float + NumCast + Zero + One + Copy + 'a + std::fmt::Display,
800{
801 let start_time = std::time::Instant::now();
802
803 let mut xtx = Array2::<F>::zeros((0, 0));
804 let mut xty = Array1::<F>::zeros(0);
805 let mut n_samples_ = 0;
806 let mut n_features = 0;
807 let mut initialized = false;
808
809 for (x_chunk, y_chunk) in data_chunks {
811 let (chunk_samples, chunk_features) = x_chunk.dim();
812
813 if !initialized {
814 n_features = chunk_features;
815 xtx = Array2::<F>::zeros((n_features, n_features));
816 xty = Array1::<F>::zeros(n_features);
817 initialized = true;
818 } else if chunk_features != n_features {
819 return Err(StatsError::DimensionMismatch(
820 "All _chunks must have same number of features".to_string(),
821 ));
822 }
823
824 if y_chunk.len() != chunk_samples {
825 return Err(StatsError::DimensionMismatch(
826 "X and y must have same number of samples".to_string(),
827 ));
828 }
829
830 n_samples_ += chunk_samples;
831
832 for i in 0..n_features {
834 for j in i..n_features {
835 let mut sum = F::zero();
836 for k in 0..chunk_samples {
837 sum = sum + x_chunk[[k, i]] * x_chunk[[k, j]];
838 }
839 xtx[[i, j]] = xtx[[i, j]] + sum;
840 if i != j {
841 xtx[[j, i]] = xtx[[i, j]];
842 }
843 }
844 }
845
846 for i in 0..n_features {
848 let mut sum = F::zero();
849 for k in 0..chunk_samples {
850 sum = sum + x_chunk[[k, i]] * y_chunk[k];
851 }
852 xty[i] = xty[i] + sum;
853 }
854 }
855
856 if n_samples_ == 0 {
857 return Err(StatsError::InvalidArgument("No data provided".to_string()));
858 }
859
860 for i in 0..n_features {
862 xtx[[i, i]] = xtx[[i, i]] + regularization;
863 }
864
865 let coefficients = solve_linear_system(&xtx.view(), &xty.view())?;
868
869 let memory_used =
870 n_features * n_features * std::mem::size_of::<F>() + n_features * std::mem::size_of::<F>();
871 manager.record_operation(OperationMetrics {
872 operation_type: "streaming_regression_enhanced".to_string(),
873 memory_used,
874 processing_time: start_time.elapsed(),
875 chunksize_used: manager.constraints.chunksize,
876 });
877
878 Ok(coefficients)
879}
880
881#[allow(dead_code)]
883fn solve_linear_system<F>(a: &ArrayView2<F>, b: &ArrayView1<F>) -> StatsResult<Array1<F>>
884where
885 F: Float + NumCast + Zero + One + Copy + std::fmt::Display,
886{
887 let n = a.nrows();
888 if a.ncols() != n || b.len() != n {
889 return Err(StatsError::DimensionMismatch(
890 "Matrix dimensions incompatible".to_string(),
891 ));
892 }
893
894 let mut aug = Array2::<F>::zeros((n, n + 1));
896
897 for i in 0..n {
899 for j in 0..n {
900 aug[[i, j]] = a[[i, j]];
901 }
902 aug[[i, n]] = b[i];
903 }
904
905 for i in 0..n {
907 let mut max_row = i;
909 for k in (i + 1)..n {
910 if aug[[k, i]].abs() > aug[[max_row, i]].abs() {
911 max_row = k;
912 }
913 }
914
915 if max_row != i {
917 for j in 0..=n {
918 let temp = aug[[i, j]];
919 aug[[i, j]] = aug[[max_row, j]];
920 aug[[max_row, j]] = temp;
921 }
922 }
923
924 let pivot = aug[[i, i]];
926 if pivot.abs() < F::from(1e-12).unwrap() {
927 return Err(StatsError::ComputationError(
928 "Matrix is singular".to_string(),
929 ));
930 }
931
932 for j in i..=n {
933 aug[[i, j]] = aug[[i, j]] / pivot;
934 }
935
936 for k in 0..n {
938 if k != i {
939 let factor = aug[[k, i]];
940 for j in i..=n {
941 aug[[k, j]] = aug[[k, j]] - factor * aug[[i, j]];
942 }
943 }
944 }
945 }
946
947 let mut solution = Array1::<F>::zeros(n);
949 for i in 0..n {
950 solution[i] = aug[[i, n]];
951 }
952
953 Ok(solution)
954}
955
956#[allow(dead_code)]
959fn compute_correlation_matrix_standard<F>(
960 data: &ArrayView2<F>,
961 method: &str,
962) -> StatsResult<Array2<F>>
963where
964 F: Float
965 + NumCast
966 + Zero
967 + One
968 + Copy
969 + std::iter::Sum<F>
970 + std::fmt::Debug
971 + std::fmt::Display
972 + Send
973 + Sync
974 + scirs2_core::simd_ops::SimdUnifiedOps,
975{
976 crate::corrcoef(data, method)
978}
979
980#[allow(dead_code)]
981fn compute_correlation_matrix_blocked<F>(
982 data: &ArrayView2<F>,
983 method: &str,
984 blocksize: usize,
985) -> StatsResult<Array2<F>>
986where
987 F: Float
988 + NumCast
989 + Zero
990 + One
991 + Copy
992 + std::iter::Sum<F>
993 + std::fmt::Debug
994 + std::fmt::Display
995 + scirs2_core::simd_ops::SimdUnifiedOps,
996{
997 let (_, n_vars) = data.dim();
998 let mut corr_matrix = Array2::<F>::zeros((n_vars, n_vars));
999
1000 for i in 0..n_vars {
1002 corr_matrix[[i, i]] = F::one();
1003 }
1004
1005 for i_block in (0..n_vars).step_by(blocksize) {
1007 let i_end = (i_block + blocksize).min(n_vars);
1008
1009 for j_block in (i_block..n_vars).step_by(blocksize) {
1010 let j_end = (j_block + blocksize).min(n_vars);
1011
1012 for i in i_block..i_end {
1014 for j in j_block.max(i + 1)..j_end {
1015 let col_i = data.column(i);
1016 let col_j = data.column(j);
1017
1018 let corr = match method {
1019 "pearson" => crate::pearson_r(&col_i, &col_j)?,
1020 "spearman" => crate::spearman_r(&col_i, &col_j)?,
1021 "kendall" => crate::kendall_tau(&col_i, &col_j, "b")?,
1022 _ => {
1023 return Err(StatsError::InvalidArgument(format!(
1024 "Unknown method: {}",
1025 method
1026 )))
1027 }
1028 };
1029
1030 corr_matrix[[i, j]] = corr;
1031 corr_matrix[[j, i]] = corr;
1032 }
1033 }
1034 }
1035 }
1036
1037 Ok(corr_matrix)
1038}
1039
1040#[allow(dead_code)]
1041fn compute_covariance_from_centered<F>(data: &ArrayView2<F>) -> StatsResult<Array2<F>>
1042where
1043 F: Float + NumCast + Zero + Copy + std::fmt::Display,
1044{
1045 let (n_obs, n_vars) = data.dim();
1046 let mut cov_matrix = Array2::<F>::zeros((n_vars, n_vars));
1047 let n_f = F::from(n_obs - 1).unwrap(); for i in 0..n_vars {
1050 for j in i..n_vars {
1051 let mut cov = F::zero();
1052 for k in 0..n_obs {
1053 cov = cov + data[[k, i]] * data[[k, j]];
1054 }
1055 cov = cov / n_f;
1056 cov_matrix[[i, j]] = cov;
1057 cov_matrix[[j, i]] = cov;
1058 }
1059 }
1060
1061 Ok(cov_matrix)
1062}
1063
1064#[allow(dead_code)]
1065fn compute_eigendecomposition<F>(
1066 matrix: &ArrayView2<F>,
1067 n_components: usize,
1068) -> StatsResult<(Array2<F>, Array1<F>)>
1069where
1070 F: Float + NumCast + Zero + One + Copy + std::fmt::Display,
1071{
1072 let n = matrix.dim().0;
1073 let n_components = n_components.min(n);
1074
1075 let mut eigenvalues = Array1::<F>::zeros(n_components);
1078 let mut eigenvectors = Array2::<F>::zeros((n, n_components));
1079
1080 for k in 0..n_components {
1081 let mut v = Array1::<F>::ones(n);
1083
1084 for _ in 0..100 {
1086 let mut new_v = Array1::<F>::zeros(n);
1088
1089 for i in 0..n {
1091 let mut sum = F::zero();
1092 for j in 0..n {
1093 sum = sum + matrix[[i, j]] * v[j];
1094 }
1095 new_v[i] = sum;
1096 }
1097
1098 for prev_k in 0..k {
1100 let mut dot_product = F::zero();
1101 for i in 0..n {
1102 dot_product = dot_product + new_v[i] * eigenvectors[[i, prev_k]];
1103 }
1104
1105 for i in 0..n {
1106 new_v[i] = new_v[i] - dot_product * eigenvectors[[i, prev_k]];
1107 }
1108 }
1109
1110 let mut norm = F::zero();
1112 for i in 0..n {
1113 norm = norm + new_v[i] * new_v[i];
1114 }
1115 norm = norm.sqrt();
1116
1117 if norm > F::epsilon() {
1118 for i in 0..n {
1119 new_v[i] = new_v[i] / norm;
1120 }
1121 }
1122
1123 let mut converged = true;
1125 for i in 0..n {
1126 if (new_v[i] - v[i]).abs() > F::from(1e-6).unwrap() {
1127 converged = false;
1128 break;
1129 }
1130 }
1131
1132 v = new_v;
1133
1134 if converged {
1135 break;
1136 }
1137 }
1138
1139 let mut eigenvalue = F::zero();
1141 for i in 0..n {
1142 let mut sum = F::zero();
1143 for j in 0..n {
1144 sum = sum + matrix[[i, j]] * v[j];
1145 }
1146 eigenvalue = eigenvalue + v[i] * sum;
1147 }
1148
1149 eigenvalues[k] = eigenvalue;
1150 for i in 0..n {
1151 eigenvectors[[i, k]] = v[i];
1152 }
1153 }
1154
1155 Ok((eigenvectors, eigenvalues))
1156}
1157
1158#[allow(dead_code)]
1159fn matrix_multiply<F>(a: &ArrayView2<F>, b: &ArrayView2<F>) -> StatsResult<Array2<F>>
1160where
1161 F: Float + NumCast + Zero + Copy + std::fmt::Display,
1162{
1163 let (m, n) = a.dim();
1164 let (n2, p) = b.dim();
1165
1166 if n != n2 {
1167 return Err(StatsError::DimensionMismatch(
1168 "Matrix dimensions don't match".to_string(),
1169 ));
1170 }
1171
1172 let mut result = Array2::<F>::zeros((m, p));
1173
1174 for i in 0..m {
1175 for j in 0..p {
1176 let mut sum = F::zero();
1177 for k in 0..n {
1178 sum = sum + a[[i, k]] * b[[k, j]];
1179 }
1180 result[[i, j]] = sum;
1181 }
1182 }
1183
1184 Ok(result)
1185}
1186
1187#[allow(dead_code)]
1188fn incremental_pca<F>(
1189 data: &ArrayView2<F>,
1190 n_components: usize,
1191 means: &Array1<F>,
1192 manager: &mut AdaptiveMemoryManager,
1193) -> StatsResult<PCAResult<F>>
1194where
1195 F: Float + NumCast + Zero + One + Copy + Send + Sync + std::fmt::Debug + std::fmt::Display,
1196{
1197 let (n_obs, n_vars) = data.dim();
1198 let n_components = n_components.min(n_vars);
1199
1200 let batchsize = manager.get_optimal_chunksize(n_obs, std::mem::size_of::<F>() * n_vars);
1202
1203 let mut components = Array2::<F>::zeros((n_vars, n_components));
1205 for i in 0..n_components {
1206 components[[i % n_vars, i]] = F::one();
1207 }
1208
1209 let mut explained_variance = Array1::<F>::zeros(n_components);
1210 let mut _n_samples_seen = 0;
1211
1212 for batch_start in (0..n_obs).step_by(batchsize) {
1214 let batch_end = (batch_start + batchsize).min(n_obs);
1215 let batch = data.slice(scirs2_core::ndarray::s![batch_start..batch_end, ..]);
1216
1217 let mut centered_batch = Array2::<F>::zeros(batch.dim());
1219 for i in 0..batch.nrows() {
1220 for j in 0..batch.ncols() {
1221 centered_batch[[i, j]] = batch[[i, j]] - means[j];
1222 }
1223 }
1224
1225 for k in 0..n_components {
1227 let component = components.column(k).to_owned();
1228
1229 let mut projections = Array1::<F>::zeros(batch.nrows());
1231 for i in 0..batch.nrows() {
1232 let mut projection = F::zero();
1233 for j in 0..n_vars {
1234 projection = projection + centered_batch[[i, j]] * component[j];
1235 }
1236 projections[i] = projection;
1237 }
1238
1239 let mut new_component = Array1::<F>::zeros(n_vars);
1241 for j in 0..n_vars {
1242 let mut sum = F::zero();
1243 for i in 0..batch.nrows() {
1244 sum = sum + centered_batch[[i, j]] * projections[i];
1245 }
1246 new_component[j] = sum;
1247 }
1248
1249 let mut norm = F::zero();
1251 for j in 0..n_vars {
1252 norm = norm + new_component[j] * new_component[j];
1253 }
1254 norm = norm.sqrt();
1255
1256 if norm > F::epsilon() {
1257 for j in 0..n_vars {
1258 components[[j, k]] = new_component[j] / norm;
1259 }
1260
1261 let variance = projections
1263 .iter()
1264 .map(|&x| x * x)
1265 .fold(F::zero(), |acc, x| acc + x);
1266 explained_variance[k] = variance / F::from(batch.nrows()).unwrap();
1267 }
1268 }
1269
1270 _n_samples_seen += batch.nrows();
1271 }
1272
1273 let mut transformeddata = Array2::<F>::zeros((n_obs, n_components));
1275 for i in 0..n_obs {
1276 for k in 0..n_components {
1277 let mut projection = F::zero();
1278 for j in 0..n_vars {
1279 let centered_val = data[[i, j]] - means[j];
1280 projection = projection + centered_val * components[[j, k]];
1281 }
1282 transformeddata[[i, k]] = projection;
1283 }
1284 }
1285
1286 Ok(PCAResult {
1287 components,
1288 explained_variance,
1289 transformeddata,
1290 mean: means.clone(),
1291 })
1292}
1293
1294#[allow(dead_code)]
1295fn checkarray_finite_2d<F, D>(arr: &ArrayBase<D, Ix2>, name: &str) -> StatsResult<()>
1296where
1297 F: Float,
1298 D: Data<Elem = F>,
1299{
1300 for &val in arr.iter() {
1301 if !val.is_finite() {
1302 return Err(StatsError::InvalidArgument(format!(
1303 "{} contains non-finite values",
1304 name
1305 )));
1306 }
1307 }
1308 Ok(())
1309}
1310
1311#[cfg(test)]
1312mod tests {
1313 use super::*;
1314 use scirs2_core::ndarray::array;
1315
1316 #[test]
1317 fn test_adaptive_memory_manager() {
1318 let constraints = MemoryConstraints::default();
1319 let manager = AdaptiveMemoryManager::new(constraints);
1320
1321 let chunksize = manager.get_optimal_chunksize(1000, 8);
1322 assert!(chunksize > 0);
1323 assert!(chunksize <= 1000);
1324
1325 let metrics = OperationMetrics {
1326 operation_type: "test".to_string(),
1327 memory_used: 1024,
1328 processing_time: std::time::Duration::from_millis(10),
1329 chunksize_used: chunksize,
1330 };
1331
1332 manager.record_operation(metrics);
1333 let stats = manager.get_statistics();
1334 assert_eq!(stats.operations_completed, 1);
1335 }
1336
1337 #[test]
1338 fn test_cache_oblivious_matrix_mult() {
1339 let a = array![[1.0, 2.0], [3.0, 4.0]];
1340 let b = array![[5.0, 6.0], [7.0, 8.0]];
1341
1342 let result = cache_oblivious_matrix_mult(&a.view(), &b.view(), 2).unwrap();
1343
1344 assert!((result[[0, 0]] - 19.0).abs() < 1e-10);
1346 assert!((result[[0, 1]] - 22.0).abs() < 1e-10);
1347 assert!((result[[1, 0]] - 43.0).abs() < 1e-10);
1348 assert!((result[[1, 1]] - 50.0).abs() < 1e-10);
1349 }
1350}