1use crate::error::{StatsError, StatsResult};
7use scirs2_core::ndarray::{Array1, Array2, ArrayBase, ArrayView1, ArrayView2, Data, Ix2};
8use scirs2_core::numeric::{Float, NumCast, One, Zero};
9use scirs2_core::simd_ops::SimdUnifiedOps;
10use std::cmp::Ordering;
11use std::collections::VecDeque;
12use std::sync::{Arc, Mutex};
13
14#[derive(Debug, Clone)]
16pub struct MemoryConstraints {
17 pub max_memory_bytes: usize,
19 pub chunksize: usize,
21 pub use_memory_mapping: bool,
23 pub enable_gc_hints: bool,
25}
26
27impl Default for MemoryConstraints {
28 fn default() -> Self {
29 Self {
31 max_memory_bytes: 1_024 * 1_024 * 1_024,
32 chunksize: 64 * 1024,
33 use_memory_mapping: true,
34 enable_gc_hints: true,
35 }
36 }
37}
38
39pub struct AdaptiveMemoryManager {
41 constraints: MemoryConstraints,
42 current_usage: Arc<Mutex<usize>>,
43 peak_usage: Arc<Mutex<usize>>,
44 operation_history: Arc<Mutex<VecDeque<OperationMetrics>>>,
45}
46
47#[derive(Debug, Clone)]
48pub struct OperationMetrics {
49 operation_type: String,
50 memory_used: usize,
51 processing_time: std::time::Duration,
52 chunksize_used: usize,
53}
54
55impl AdaptiveMemoryManager {
56 pub fn new(constraints: MemoryConstraints) -> Self {
57 Self {
58 constraints,
59 current_usage: Arc::new(Mutex::new(0)),
60 peak_usage: Arc::new(Mutex::new(0)),
61 operation_history: Arc::new(Mutex::new(VecDeque::with_capacity(100))),
62 }
63 }
64
65 pub fn get_optimal_chunksize(&self, datasize: usize, elementsize: usize) -> usize {
67 let current_usage = *self.current_usage.lock().expect("Operation failed");
68 let available_memory = self
69 .constraints
70 .max_memory_bytes
71 .saturating_sub(current_usage);
72
73 let max_chunk_memory = available_memory * 4 / 5;
75 let max_chunk_elements = max_chunk_memory / elementsize;
76
77 let optimalsize = max_chunk_elements
79 .min(datasize)
80 .min(self.constraints.chunksize);
81 optimalsize.next_power_of_two() / 2 }
83
84 pub fn record_operation(&self, metrics: OperationMetrics) {
86 let mut history = self.operation_history.lock().expect("Operation failed");
87
88 if history.len() >= 100 {
90 history.pop_front();
91 }
92
93 history.push_back(metrics.clone());
94
95 let mut peak = self.peak_usage.lock().expect("Operation failed");
97 *peak = (*peak).max(metrics.memory_used);
98 }
99
100 pub fn get_statistics(&self) -> MemoryStatistics {
102 let current_usage = *self.current_usage.lock().expect("Operation failed");
103 let peak_usage = *self.peak_usage.lock().expect("Operation failed");
104 let history = self.operation_history.lock().expect("Operation failed");
105
106 let avg_memory_per_op = if !history.is_empty() {
107 history.iter().map(|m| m.memory_used).sum::<usize>() / history.len()
108 } else {
109 0
110 };
111
112 MemoryStatistics {
113 current_usage,
114 peak_usage,
115 avg_memory_per_operation: avg_memory_per_op,
116 operations_completed: history.len(),
117 memory_efficiency: if peak_usage > 0 {
118 (avg_memory_per_op as f64 / peak_usage as f64) * 100.0
119 } else {
120 100.0
121 },
122 }
123 }
124}
125
126#[derive(Debug, Clone)]
127pub struct MemoryStatistics {
128 pub current_usage: usize,
129 pub peak_usage: usize,
130 pub avg_memory_per_operation: usize,
131 pub operations_completed: usize,
132 pub memory_efficiency: f64,
133}
134
135#[allow(dead_code)]
140pub fn corrcoef_memory_aware<F>(
141 data: &ArrayView2<F>,
142 method: &str,
143 manager: &mut AdaptiveMemoryManager,
144) -> StatsResult<Array2<F>>
145where
146 F: Float
147 + NumCast
148 + SimdUnifiedOps
149 + Zero
150 + One
151 + Copy
152 + Send
153 + Sync
154 + std::iter::Sum<F>
155 + std::fmt::Debug
156 + std::fmt::Display
157 + 'static,
158{
159 let start_time = std::time::Instant::now();
160
161 checkarray_finite_2d(data, "data")?;
162
163 let (n_obs, n_vars) = data.dim();
164 let elementsize = std::mem::size_of::<F>();
165
166 let matrix_memory = n_vars * n_vars * elementsize;
168 let temp_memory = n_obs * elementsize * 2; let total_estimated = matrix_memory + temp_memory;
170
171 let mut corr_matrix = Array2::<F>::zeros((n_vars, n_vars));
172
173 for i in 0..n_vars {
175 corr_matrix[[i, i]] = F::one();
176 }
177
178 if total_estimated <= manager.constraints.max_memory_bytes {
179 corr_matrix = compute_correlation_matrix_standard(data, method)?;
181 } else {
182 let blocksize = manager.get_optimal_chunksize(n_vars, elementsize * n_vars);
184 corr_matrix = compute_correlation_matrix_blocked(data, method, blocksize)?;
185 }
186
187 let metrics = OperationMetrics {
189 operation_type: format!("corrcoef_memory_aware_{}", method),
190 memory_used: total_estimated,
191 processing_time: start_time.elapsed(),
192 chunksize_used: n_vars,
193 };
194 manager.record_operation(metrics);
195
196 Ok(corr_matrix)
197}
198
199#[allow(dead_code)]
201pub fn cache_oblivious_matrix_mult<F>(
202 a: &ArrayView2<F>,
203 b: &ArrayView2<F>,
204 threshold: usize,
205) -> StatsResult<Array2<F>>
206where
207 F: Float + NumCast + Zero + One + Copy + Send + Sync + std::fmt::Display,
208{
209 let (m, n) = a.dim();
210 let (n2, p) = b.dim();
211
212 if n != n2 {
213 return Err(StatsError::DimensionMismatch(
214 "Matrix dimensions don't match for multiplication".to_string(),
215 ));
216 }
217
218 let mut result = Array2::<F>::zeros((m, p));
219
220 if m <= threshold && n <= threshold && p <= threshold {
221 for i in 0..m {
223 for j in 0..p {
224 let mut sum = F::zero();
225 for k in 0..n {
226 sum = sum + a[[i, k]] * b[[k, j]];
227 }
228 result[[i, j]] = sum;
229 }
230 }
231 } else {
232 let mid_m = m / 2;
234 let _mid_n = n / 2;
235 let _mid_p = p / 2;
236
237 if m > threshold {
240 let a_top = a.slice(scirs2_core::ndarray::s![..mid_m, ..]);
241 let a_bottom = a.slice(scirs2_core::ndarray::s![mid_m.., ..]);
242
243 let result_top = cache_oblivious_matrix_mult(&a_top, b, threshold)?;
244 let result_bottom = cache_oblivious_matrix_mult(&a_bottom, b, threshold)?;
245
246 result
247 .slice_mut(scirs2_core::ndarray::s![..mid_m, ..])
248 .assign(&result_top);
249 result
250 .slice_mut(scirs2_core::ndarray::s![mid_m.., ..])
251 .assign(&result_bottom);
252 }
253 }
254
255 Ok(result)
256}
257
258#[allow(dead_code)]
260pub fn streaming_covariance_matrix<'a, F>(
261 data_chunks: impl Iterator<Item = ArrayView2<'a, F>>,
262 manager: &mut AdaptiveMemoryManager,
263) -> StatsResult<Array2<F>>
264where
265 F: Float + NumCast + Zero + One + Copy + 'a + std::fmt::Display,
266{
267 let start_time = std::time::Instant::now();
268
269 let mut n_vars = 0;
270 let mut total_obs = 0;
271 let mut sum_x = Array1::<F>::zeros(0);
272 let mut sum_x2 = Array2::<F>::zeros((0, 0));
273 let mut initialized = false;
274
275 for chunk in data_chunks {
276 let (chunk_obs, chunk_vars) = chunk.dim();
277
278 if !initialized {
279 n_vars = chunk_vars;
280 sum_x = Array1::<F>::zeros(n_vars);
281 sum_x2 = Array2::<F>::zeros((n_vars, n_vars));
282 initialized = true;
283 } else if chunk_vars != n_vars {
284 return Err(StatsError::DimensionMismatch(
285 "All _chunks must have the same number of variables".to_string(),
286 ));
287 }
288
289 total_obs += chunk_obs;
290
291 for i in 0..chunk_obs {
293 let row = chunk.row(i);
294
295 for j in 0..n_vars {
297 sum_x[j] = sum_x[j] + row[j];
298 }
299
300 for j in 0..n_vars {
302 for k in j..n_vars {
303 let product = row[j] * row[k];
304 sum_x2[[j, k]] = sum_x2[[j, k]] + product;
305 if j != k {
306 sum_x2[[k, j]] = sum_x2[[k, j]] + product;
307 }
308 }
309 }
310 }
311 }
312
313 if total_obs == 0 {
314 return Err(StatsError::InvalidArgument("No data provided".to_string()));
315 }
316
317 let mut cov_matrix = Array2::<F>::zeros((n_vars, n_vars));
319 let n_f = F::from(total_obs).expect("Failed to convert to float");
320
321 for i in 0..n_vars {
322 for j in 0..n_vars {
323 let mean_i = sum_x[i] / n_f;
324 let mean_j = sum_x[j] / n_f;
325 let cov = (sum_x2[[i, j]] / n_f) - (mean_i * mean_j);
326 cov_matrix[[i, j]] = cov;
327 }
328 }
329
330 let memory_used = (n_vars * n_vars + n_vars) * std::mem::size_of::<F>();
332 let metrics = OperationMetrics {
333 operation_type: "streaming_covariance_matrix".to_string(),
334 memory_used,
335 processing_time: start_time.elapsed(),
336 chunksize_used: n_vars,
337 };
338 manager.record_operation(metrics);
339
340 Ok(cov_matrix)
341}
342
343#[allow(dead_code)]
345pub fn pca_memory_efficient<F>(
346 data: &ArrayView2<F>,
347 n_components: Option<usize>,
348 manager: &mut AdaptiveMemoryManager,
349) -> StatsResult<PCAResult<F>>
350where
351 F: Float + NumCast + Zero + One + Copy + Send + Sync + std::fmt::Debug + std::fmt::Display,
352{
353 let start_time = std::time::Instant::now();
354
355 let (n_obs, n_vars) = data.dim();
356 let n_components = n_components.unwrap_or(n_vars.min(n_obs));
357
358 let mut means = Array1::<F>::zeros(n_vars);
360 for i in 0..n_vars {
361 let column = data.column(i);
362 means[i] = column.iter().fold(F::zero(), |acc, &x| acc + x)
363 / F::from(n_obs).expect("Failed to convert to float");
364 }
365
366 let centereddata_memory = n_obs * n_vars * std::mem::size_of::<F>();
368
369 if centereddata_memory <= manager.constraints.max_memory_bytes / 2 {
370 let mut centereddata = Array2::<F>::zeros((n_obs, n_vars));
372 for i in 0..n_obs {
373 for j in 0..n_vars {
374 centereddata[[i, j]] = data[[i, j]] - means[j];
375 }
376 }
377
378 let cov_matrix = compute_covariance_from_centered(¢ereddata.view())?;
380
381 let (eigenvectors, eigenvalues) =
383 compute_eigendecomposition(&cov_matrix.view(), n_components)?;
384
385 let transformed = matrix_multiply(¢ereddata.view(), &eigenvectors.view())?;
387
388 let result = PCAResult {
389 components: eigenvectors,
390 explained_variance: eigenvalues,
391 transformeddata: transformed,
392 mean: means,
393 };
394
395 let metrics = OperationMetrics {
396 operation_type: "pca_memory_efficient".to_string(),
397 memory_used: centereddata_memory,
398 processing_time: start_time.elapsed(),
399 chunksize_used: n_vars,
400 };
401 manager.record_operation(metrics);
402
403 Ok(result)
404 } else {
405 incremental_pca(data, n_components, &means, manager)
407 }
408}
409
410#[derive(Debug, Clone)]
411pub struct PCAResult<F> {
412 pub components: Array2<F>,
413 pub explained_variance: Array1<F>,
414 pub transformeddata: Array2<F>,
415 pub mean: Array1<F>,
416}
417
418#[allow(dead_code)]
420pub fn streaming_pca_enhanced<'a, F>(
421 data_chunks: impl Iterator<Item = ArrayView2<'a, F>>,
422 n_components: usize,
423 manager: &mut AdaptiveMemoryManager,
424) -> StatsResult<PCAResult<F>>
425where
426 F: Float + NumCast + Zero + One + Copy + 'a + std::fmt::Display,
427{
428 let start_time = std::time::Instant::now();
429
430 let mut n_samples_ = 0;
432 let mut n_features = 0;
433 let mut running_mean = Array1::<F>::zeros(0);
434 let mut running_cov = Array2::<F>::zeros((0, 0));
435 let mut initialized = false;
436
437 for chunk in data_chunks {
438 let (chunk_samples, chunk_features) = chunk.dim();
439
440 if !initialized {
441 n_features = chunk_features;
442 running_mean = Array1::<F>::zeros(n_features);
443 running_cov = Array2::<F>::zeros((n_features, n_features));
444 initialized = true;
445 } else if chunk_features != n_features {
446 return Err(StatsError::DimensionMismatch(
447 "All _chunks must have same number of features".to_string(),
448 ));
449 }
450
451 for i in 0..chunk_samples {
453 n_samples_ += 1;
454 let row = chunk.row(i);
455
456 let n_f = F::from(n_samples_).expect("Failed to convert to float");
458 for j in 0..n_features {
459 let delta = row[j] - running_mean[j];
460 running_mean[j] = running_mean[j] + delta / n_f;
461 }
462
463 if n_samples_ > 1 {
465 for j in 0..n_features {
466 for k in j..n_features {
467 let prod = (row[j] - running_mean[j]) * (row[k] - running_mean[k]);
468 running_cov[[j, k]] = running_cov[[j, k]]
469 * F::from(n_samples_ - 1).expect("Failed to convert to float")
470 / n_f
471 + prod / n_f;
472 if j != k {
473 running_cov[[k, j]] = running_cov[[j, k]];
474 }
475 }
476 }
477 }
478 }
479 }
480
481 if n_samples_ == 0 {
482 return Err(StatsError::InvalidArgument("No data provided".to_string()));
483 }
484
485 let (components, explained_variance) =
487 compute_eigendecomposition(&running_cov.view(), n_components)?;
488
489 let memory_used =
491 n_features * n_features * std::mem::size_of::<F>() + n_features * std::mem::size_of::<F>();
492 manager.record_operation(OperationMetrics {
493 operation_type: "streaming_pca_enhanced".to_string(),
494 memory_used,
495 processing_time: start_time.elapsed(),
496 chunksize_used: manager.constraints.chunksize,
497 });
498
499 Ok(PCAResult {
500 components,
501 explained_variance,
502 transformeddata: Array2::zeros((0, 0)), mean: running_mean,
504 })
505}
506
507#[allow(dead_code)]
509pub fn streaming_histogram_adaptive<'a, F>(
510 data_chunks: impl Iterator<Item = ArrayView1<'a, F>>,
511 manager: &mut AdaptiveMemoryManager,
512) -> StatsResult<(Array1<F>, Array1<usize>)>
513where
514 F: Float + NumCast + Zero + One + Copy + PartialOrd + 'a + std::fmt::Display,
515{
516 let start_time = std::time::Instant::now();
517
518 let mut min_val = F::infinity();
519 let mut max_val = F::neg_infinity();
520 let mut total_count = 0;
521 let mut values = Vec::new(); for chunk in data_chunks {
525 for &value in chunk.iter() {
526 if value < min_val {
527 min_val = value;
528 }
529 if value > max_val {
530 max_val = value;
531 }
532 total_count += 1;
533
534 if values.len() < manager.constraints.chunksize {
536 values.push(value);
537 } else {
538 let j = {
539 use scirs2_core::random::Rng;
540 let mut rng = scirs2_core::random::thread_rng();
541 rng.random_range(0..total_count)
542 };
543 if j < values.len() {
544 values[j] = value;
545 }
546 }
547 }
548 }
549
550 if total_count == 0 {
551 return Err(StatsError::InvalidArgument("No data provided".to_string()));
552 }
553
554 values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal));
556 let q75_idx = (values.len() as f64 * 0.75) as usize;
557 let q25_idx = (values.len() as f64 * 0.25) as usize;
558 let iqr = values[q75_idx] - values[q25_idx];
559 let h = F::from(2.0).expect("Failed to convert constant to float") * iqr
560 / F::from(total_count as f64)
561 .expect("Operation failed")
562 .powf(F::from(1.0 / 3.0).expect("Failed to convert to float"));
563 let n_bins = if h > F::zero() {
564 ((max_val - min_val) / h)
565 .to_usize()
566 .unwrap_or(50)
567 .max(10)
568 .min(1000)
569 } else {
570 50 };
572
573 let bin_width = (max_val - min_val) / F::from(n_bins).expect("Failed to convert to float");
574 let mut bin_edges = Array1::<F>::zeros(n_bins + 1);
575 let mut counts = Array1::<usize>::zeros(n_bins);
576
577 for i in 0..=n_bins {
579 bin_edges[i] = min_val + F::from(i).expect("Failed to convert to float") * bin_width;
580 }
581
582 for &value in values.iter() {
585 if value >= min_val && value <= max_val {
586 let bin_idx = ((value - min_val) / bin_width)
587 .to_usize()
588 .unwrap_or(0)
589 .min(n_bins - 1);
590 counts[bin_idx] += 1;
591 }
592 }
593
594 let memory_used = n_bins * (std::mem::size_of::<F>() + std::mem::size_of::<usize>());
595 manager.record_operation(OperationMetrics {
596 operation_type: "streaming_histogram_adaptive".to_string(),
597 memory_used,
598 processing_time: start_time.elapsed(),
599 chunksize_used: manager.constraints.chunksize,
600 });
601
602 Ok((bin_edges, counts))
603}
604
605#[allow(dead_code)]
607pub fn streaming_quantiles_p2<'a, F>(
608 data_chunks: impl Iterator<Item = ArrayView1<'a, F>>,
609 quantiles: &[f64],
610 manager: &mut AdaptiveMemoryManager,
611) -> StatsResult<Array1<F>>
612where
613 F: Float + NumCast + Zero + One + Copy + PartialOrd + 'a + std::fmt::Display,
614{
615 let start_time = std::time::Instant::now();
616
617 let mut p2_estimators: Vec<P2Estimator<F>> =
618 quantiles.iter().map(|&q| P2Estimator::new(q)).collect();
619
620 let mut total_count = 0;
621
622 for chunk in data_chunks {
623 for &value in chunk.iter() {
624 total_count += 1;
625 for estimator in &mut p2_estimators {
626 estimator.update(value);
627 }
628 }
629 }
630
631 if total_count == 0 {
632 return Err(StatsError::InvalidArgument("No data provided".to_string()));
633 }
634
635 let results: Vec<F> = p2_estimators.iter().map(|est| est.quantile()).collect();
636
637 let memory_used = quantiles.len() * std::mem::size_of::<P2Estimator<F>>();
638 manager.record_operation(OperationMetrics {
639 operation_type: "streaming_quantiles_p2".to_string(),
640 memory_used,
641 processing_time: start_time.elapsed(),
642 chunksize_used: manager.constraints.chunksize,
643 });
644
645 Ok(Array1::from_vec(results))
646}
647
648#[derive(Debug, Clone)]
650struct P2Estimator<F> {
651 quantile: f64,
652 markers: [F; 5],
653 positions: [f64; 5],
654 desired_positions: [f64; 5],
655 increment: [f64; 5],
656 count: usize,
657}
658
659impl<F> P2Estimator<F>
660where
661 F: Float + NumCast + Copy + PartialOrd + std::fmt::Display,
662{
663 fn new(quantile: f64) -> Self {
664 let mut estimator = Self {
665 quantile,
666 markers: [F::zero(); 5],
667 positions: [1.0, 2.0, 3.0, 4.0, 5.0],
668 desired_positions: [
669 1.0,
670 1.0 + 2.0 * quantile,
671 1.0 + 4.0 * quantile,
672 3.0 + 2.0 * quantile,
673 5.0,
674 ],
675 increment: [0.0, quantile / 2.0, quantile, (1.0 + quantile) / 2.0, 1.0],
676 count: 0,
677 };
678
679 estimator.desired_positions[1] = 1.0 + 2.0 * quantile;
681 estimator.desired_positions[2] = 1.0 + 4.0 * quantile;
682 estimator.desired_positions[3] = 3.0 + 2.0 * quantile;
683
684 estimator
685 }
686
687 fn update(&mut self, value: F) {
688 self.count += 1;
689
690 if self.count <= 5 {
691 self.markers[self.count - 1] = value;
693 if self.count == 5 {
694 self.markers
695 .sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
696 }
697 return;
698 }
699
700 let mut k = 0;
702 for i in 0..4 {
703 if value >= self.markers[i] {
704 k = i + 1;
705 }
706 }
707
708 if k == 0 {
709 self.markers[0] = value;
710 k = 1;
711 } else if k == 5 {
712 self.markers[4] = value;
713 k = 4;
714 }
715
716 for i in k..5 {
718 self.positions[i] += 1.0;
719 }
720
721 for i in 0..5 {
723 self.desired_positions[i] += self.increment[i];
724 }
725
726 for i in 1..4 {
728 let d = self.desired_positions[i] - self.positions[i];
729 if (d >= 1.0 && self.positions[i + 1] - self.positions[i] > 1.0)
730 || (d <= -1.0 && self.positions[i - 1] - self.positions[i] < -1.0)
731 {
732 let d_sign = if d >= 0.0 { 1.0 } else { -1.0 };
733 let new_height = self.parabolic_prediction(i, d_sign);
734
735 if self.markers[i - 1] < new_height && new_height < self.markers[i + 1] {
736 self.markers[i] = new_height;
737 } else {
738 self.markers[i] = self.linear_prediction(i, d_sign);
739 }
740 self.positions[i] += d_sign;
741 }
742 }
743 }
744
745 fn parabolic_prediction(&self, i: usize, d: f64) -> F {
746 let qi = self.markers[i];
747 let qi_prev = self.markers[i - 1];
748 let qi_next = self.markers[i + 1];
749 let ni = self.positions[i];
750 let ni_prev = self.positions[i - 1];
751 let ni_next = self.positions[i + 1];
752
753 let d_f = F::from(d).expect("Failed to convert to float");
754 let ni_f = F::from(ni).expect("Failed to convert to float");
755 let ni_prev_f = F::from(ni_prev).expect("Failed to convert to float");
756 let ni_next_f = F::from(ni_next).expect("Failed to convert to float");
757
758 let a = d_f / (ni_next_f - ni_prev_f);
759 let b1 = (ni_f - ni_prev_f + d_f) * (qi_next - qi) / (ni_next_f - ni_f);
760 let b2 = (ni_next_f - ni_f - d_f) * (qi - qi_prev) / (ni_f - ni_prev_f);
761
762 qi + a * (b1 + b2)
763 }
764
765 fn linear_prediction(&self, i: usize, d: f64) -> F {
766 if d > 0.0 {
767 let qi_next = self.markers[i + 1];
768 let qi = self.markers[i];
769 let ni_next = self.positions[i + 1];
770 let ni = self.positions[i];
771 qi + (qi_next - qi) * F::from(d / (ni_next - ni)).expect("Operation failed")
772 } else {
773 let qi = self.markers[i];
774 let qi_prev = self.markers[i - 1];
775 let ni = self.positions[i];
776 let ni_prev = self.positions[i - 1];
777 qi + (qi_prev - qi) * F::from(-d / (ni - ni_prev)).expect("Operation failed")
778 }
779 }
780
781 fn quantile(&self) -> F {
782 if self.count < 5 {
783 let mut sorted = self.markers[..self.count.min(5)].to_vec();
785 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
786 let idx = (self.quantile * (sorted.len() - 1) as f64) as usize;
787 sorted[idx]
788 } else {
789 self.markers[2] }
791 }
792}
793
794#[allow(dead_code)]
796pub fn streaming_regression_enhanced<'a, F>(
797 data_chunks: impl Iterator<Item = (ArrayView2<'a, F>, ArrayView1<'a, F>)>,
798 regularization: F,
799 manager: &mut AdaptiveMemoryManager,
800) -> StatsResult<Array1<F>>
801where
802 F: Float + NumCast + Zero + One + Copy + 'a + std::fmt::Display,
803{
804 let start_time = std::time::Instant::now();
805
806 let mut xtx = Array2::<F>::zeros((0, 0));
807 let mut xty = Array1::<F>::zeros(0);
808 let mut n_samples_ = 0;
809 let mut n_features = 0;
810 let mut initialized = false;
811
812 for (x_chunk, y_chunk) in data_chunks {
814 let (chunk_samples, chunk_features) = x_chunk.dim();
815
816 if !initialized {
817 n_features = chunk_features;
818 xtx = Array2::<F>::zeros((n_features, n_features));
819 xty = Array1::<F>::zeros(n_features);
820 initialized = true;
821 } else if chunk_features != n_features {
822 return Err(StatsError::DimensionMismatch(
823 "All _chunks must have same number of features".to_string(),
824 ));
825 }
826
827 if y_chunk.len() != chunk_samples {
828 return Err(StatsError::DimensionMismatch(
829 "X and y must have same number of samples".to_string(),
830 ));
831 }
832
833 n_samples_ += chunk_samples;
834
835 for i in 0..n_features {
837 for j in i..n_features {
838 let mut sum = F::zero();
839 for k in 0..chunk_samples {
840 sum = sum + x_chunk[[k, i]] * x_chunk[[k, j]];
841 }
842 xtx[[i, j]] = xtx[[i, j]] + sum;
843 if i != j {
844 xtx[[j, i]] = xtx[[i, j]];
845 }
846 }
847 }
848
849 for i in 0..n_features {
851 let mut sum = F::zero();
852 for k in 0..chunk_samples {
853 sum = sum + x_chunk[[k, i]] * y_chunk[k];
854 }
855 xty[i] = xty[i] + sum;
856 }
857 }
858
859 if n_samples_ == 0 {
860 return Err(StatsError::InvalidArgument("No data provided".to_string()));
861 }
862
863 for i in 0..n_features {
865 xtx[[i, i]] = xtx[[i, i]] + regularization;
866 }
867
868 let coefficients = solve_linear_system(&xtx.view(), &xty.view())?;
871
872 let memory_used =
873 n_features * n_features * std::mem::size_of::<F>() + n_features * std::mem::size_of::<F>();
874 manager.record_operation(OperationMetrics {
875 operation_type: "streaming_regression_enhanced".to_string(),
876 memory_used,
877 processing_time: start_time.elapsed(),
878 chunksize_used: manager.constraints.chunksize,
879 });
880
881 Ok(coefficients)
882}
883
884#[allow(dead_code)]
886fn solve_linear_system<F>(a: &ArrayView2<F>, b: &ArrayView1<F>) -> StatsResult<Array1<F>>
887where
888 F: Float + NumCast + Zero + One + Copy + std::fmt::Display,
889{
890 let n = a.nrows();
891 if a.ncols() != n || b.len() != n {
892 return Err(StatsError::DimensionMismatch(
893 "Matrix dimensions incompatible".to_string(),
894 ));
895 }
896
897 let mut aug = Array2::<F>::zeros((n, n + 1));
899
900 for i in 0..n {
902 for j in 0..n {
903 aug[[i, j]] = a[[i, j]];
904 }
905 aug[[i, n]] = b[i];
906 }
907
908 for i in 0..n {
910 let mut max_row = i;
912 for k in (i + 1)..n {
913 if aug[[k, i]].abs() > aug[[max_row, i]].abs() {
914 max_row = k;
915 }
916 }
917
918 if max_row != i {
920 for j in 0..=n {
921 let temp = aug[[i, j]];
922 aug[[i, j]] = aug[[max_row, j]];
923 aug[[max_row, j]] = temp;
924 }
925 }
926
927 let pivot = aug[[i, i]];
929 if pivot.abs() < F::from(1e-12).expect("Failed to convert constant to float") {
930 return Err(StatsError::ComputationError(
931 "Matrix is singular".to_string(),
932 ));
933 }
934
935 for j in i..=n {
936 aug[[i, j]] = aug[[i, j]] / pivot;
937 }
938
939 for k in 0..n {
941 if k != i {
942 let factor = aug[[k, i]];
943 for j in i..=n {
944 aug[[k, j]] = aug[[k, j]] - factor * aug[[i, j]];
945 }
946 }
947 }
948 }
949
950 let mut solution = Array1::<F>::zeros(n);
952 for i in 0..n {
953 solution[i] = aug[[i, n]];
954 }
955
956 Ok(solution)
957}
958
959#[allow(dead_code)]
962fn compute_correlation_matrix_standard<F>(
963 data: &ArrayView2<F>,
964 method: &str,
965) -> StatsResult<Array2<F>>
966where
967 F: Float
968 + NumCast
969 + Zero
970 + One
971 + Copy
972 + std::iter::Sum<F>
973 + std::fmt::Debug
974 + std::fmt::Display
975 + Send
976 + Sync
977 + scirs2_core::simd_ops::SimdUnifiedOps
978 + 'static,
979{
980 crate::corrcoef(data, method)
982}
983
984#[allow(dead_code)]
985fn compute_correlation_matrix_blocked<F>(
986 data: &ArrayView2<F>,
987 method: &str,
988 blocksize: usize,
989) -> StatsResult<Array2<F>>
990where
991 F: Float
992 + NumCast
993 + Zero
994 + One
995 + Copy
996 + std::iter::Sum<F>
997 + std::fmt::Debug
998 + std::fmt::Display
999 + scirs2_core::simd_ops::SimdUnifiedOps
1000 + 'static,
1001{
1002 let (_, n_vars) = data.dim();
1003 let mut corr_matrix = Array2::<F>::zeros((n_vars, n_vars));
1004
1005 for i in 0..n_vars {
1007 corr_matrix[[i, i]] = F::one();
1008 }
1009
1010 for i_block in (0..n_vars).step_by(blocksize) {
1012 let i_end = (i_block + blocksize).min(n_vars);
1013
1014 for j_block in (i_block..n_vars).step_by(blocksize) {
1015 let j_end = (j_block + blocksize).min(n_vars);
1016
1017 for i in i_block..i_end {
1019 for j in j_block.max(i + 1)..j_end {
1020 let col_i = data.column(i);
1021 let col_j = data.column(j);
1022
1023 let corr = match method {
1024 "pearson" => crate::pearson_r(&col_i, &col_j)?,
1025 "spearman" => crate::spearman_r(&col_i, &col_j)?,
1026 "kendall" => crate::kendall_tau(&col_i, &col_j, "b")?,
1027 _ => {
1028 return Err(StatsError::InvalidArgument(format!(
1029 "Unknown method: {}",
1030 method
1031 )))
1032 }
1033 };
1034
1035 corr_matrix[[i, j]] = corr;
1036 corr_matrix[[j, i]] = corr;
1037 }
1038 }
1039 }
1040 }
1041
1042 Ok(corr_matrix)
1043}
1044
1045#[allow(dead_code)]
1046fn compute_covariance_from_centered<F>(data: &ArrayView2<F>) -> StatsResult<Array2<F>>
1047where
1048 F: Float + NumCast + Zero + Copy + std::fmt::Display,
1049{
1050 let (n_obs, n_vars) = data.dim();
1051 let mut cov_matrix = Array2::<F>::zeros((n_vars, n_vars));
1052 let n_f = F::from(n_obs - 1).expect("Failed to convert to float"); for i in 0..n_vars {
1055 for j in i..n_vars {
1056 let mut cov = F::zero();
1057 for k in 0..n_obs {
1058 cov = cov + data[[k, i]] * data[[k, j]];
1059 }
1060 cov = cov / n_f;
1061 cov_matrix[[i, j]] = cov;
1062 cov_matrix[[j, i]] = cov;
1063 }
1064 }
1065
1066 Ok(cov_matrix)
1067}
1068
1069#[allow(dead_code)]
1070fn compute_eigendecomposition<F>(
1071 matrix: &ArrayView2<F>,
1072 n_components: usize,
1073) -> StatsResult<(Array2<F>, Array1<F>)>
1074where
1075 F: Float + NumCast + Zero + One + Copy + std::fmt::Display,
1076{
1077 let n = matrix.dim().0;
1078 let n_components = n_components.min(n);
1079
1080 let mut eigenvalues = Array1::<F>::zeros(n_components);
1083 let mut eigenvectors = Array2::<F>::zeros((n, n_components));
1084
1085 for k in 0..n_components {
1086 let mut v = Array1::<F>::ones(n);
1088
1089 for _ in 0..100 {
1091 let mut new_v = Array1::<F>::zeros(n);
1093
1094 for i in 0..n {
1096 let mut sum = F::zero();
1097 for j in 0..n {
1098 sum = sum + matrix[[i, j]] * v[j];
1099 }
1100 new_v[i] = sum;
1101 }
1102
1103 for prev_k in 0..k {
1105 let mut dot_product = F::zero();
1106 for i in 0..n {
1107 dot_product = dot_product + new_v[i] * eigenvectors[[i, prev_k]];
1108 }
1109
1110 for i in 0..n {
1111 new_v[i] = new_v[i] - dot_product * eigenvectors[[i, prev_k]];
1112 }
1113 }
1114
1115 let mut norm = F::zero();
1117 for i in 0..n {
1118 norm = norm + new_v[i] * new_v[i];
1119 }
1120 norm = norm.sqrt();
1121
1122 if norm > F::epsilon() {
1123 for i in 0..n {
1124 new_v[i] = new_v[i] / norm;
1125 }
1126 }
1127
1128 let mut converged = true;
1130 for i in 0..n {
1131 if (new_v[i] - v[i]).abs()
1132 > F::from(1e-6).expect("Failed to convert constant to float")
1133 {
1134 converged = false;
1135 break;
1136 }
1137 }
1138
1139 v = new_v;
1140
1141 if converged {
1142 break;
1143 }
1144 }
1145
1146 let mut eigenvalue = F::zero();
1148 for i in 0..n {
1149 let mut sum = F::zero();
1150 for j in 0..n {
1151 sum = sum + matrix[[i, j]] * v[j];
1152 }
1153 eigenvalue = eigenvalue + v[i] * sum;
1154 }
1155
1156 eigenvalues[k] = eigenvalue;
1157 for i in 0..n {
1158 eigenvectors[[i, k]] = v[i];
1159 }
1160 }
1161
1162 Ok((eigenvectors, eigenvalues))
1163}
1164
1165#[allow(dead_code)]
1166fn matrix_multiply<F>(a: &ArrayView2<F>, b: &ArrayView2<F>) -> StatsResult<Array2<F>>
1167where
1168 F: Float + NumCast + Zero + Copy + std::fmt::Display,
1169{
1170 let (m, n) = a.dim();
1171 let (n2, p) = b.dim();
1172
1173 if n != n2 {
1174 return Err(StatsError::DimensionMismatch(
1175 "Matrix dimensions don't match".to_string(),
1176 ));
1177 }
1178
1179 let mut result = Array2::<F>::zeros((m, p));
1180
1181 for i in 0..m {
1182 for j in 0..p {
1183 let mut sum = F::zero();
1184 for k in 0..n {
1185 sum = sum + a[[i, k]] * b[[k, j]];
1186 }
1187 result[[i, j]] = sum;
1188 }
1189 }
1190
1191 Ok(result)
1192}
1193
1194#[allow(dead_code)]
1195fn incremental_pca<F>(
1196 data: &ArrayView2<F>,
1197 n_components: usize,
1198 means: &Array1<F>,
1199 manager: &mut AdaptiveMemoryManager,
1200) -> StatsResult<PCAResult<F>>
1201where
1202 F: Float + NumCast + Zero + One + Copy + Send + Sync + std::fmt::Debug + std::fmt::Display,
1203{
1204 let (n_obs, n_vars) = data.dim();
1205 let n_components = n_components.min(n_vars);
1206
1207 let batchsize = manager.get_optimal_chunksize(n_obs, std::mem::size_of::<F>() * n_vars);
1209
1210 let mut components = Array2::<F>::zeros((n_vars, n_components));
1212 for i in 0..n_components {
1213 components[[i % n_vars, i]] = F::one();
1214 }
1215
1216 let mut explained_variance = Array1::<F>::zeros(n_components);
1217 let mut _n_samples_seen = 0;
1218
1219 for batch_start in (0..n_obs).step_by(batchsize) {
1221 let batch_end = (batch_start + batchsize).min(n_obs);
1222 let batch = data.slice(scirs2_core::ndarray::s![batch_start..batch_end, ..]);
1223
1224 let mut centered_batch = Array2::<F>::zeros(batch.dim());
1226 for i in 0..batch.nrows() {
1227 for j in 0..batch.ncols() {
1228 centered_batch[[i, j]] = batch[[i, j]] - means[j];
1229 }
1230 }
1231
1232 for k in 0..n_components {
1234 let component = components.column(k).to_owned();
1235
1236 let mut projections = Array1::<F>::zeros(batch.nrows());
1238 for i in 0..batch.nrows() {
1239 let mut projection = F::zero();
1240 for j in 0..n_vars {
1241 projection = projection + centered_batch[[i, j]] * component[j];
1242 }
1243 projections[i] = projection;
1244 }
1245
1246 let mut new_component = Array1::<F>::zeros(n_vars);
1248 for j in 0..n_vars {
1249 let mut sum = F::zero();
1250 for i in 0..batch.nrows() {
1251 sum = sum + centered_batch[[i, j]] * projections[i];
1252 }
1253 new_component[j] = sum;
1254 }
1255
1256 let mut norm = F::zero();
1258 for j in 0..n_vars {
1259 norm = norm + new_component[j] * new_component[j];
1260 }
1261 norm = norm.sqrt();
1262
1263 if norm > F::epsilon() {
1264 for j in 0..n_vars {
1265 components[[j, k]] = new_component[j] / norm;
1266 }
1267
1268 let variance = projections
1270 .iter()
1271 .map(|&x| x * x)
1272 .fold(F::zero(), |acc, x| acc + x);
1273 explained_variance[k] =
1274 variance / F::from(batch.nrows()).expect("Operation failed");
1275 }
1276 }
1277
1278 _n_samples_seen += batch.nrows();
1279 }
1280
1281 let mut transformeddata = Array2::<F>::zeros((n_obs, n_components));
1283 for i in 0..n_obs {
1284 for k in 0..n_components {
1285 let mut projection = F::zero();
1286 for j in 0..n_vars {
1287 let centered_val = data[[i, j]] - means[j];
1288 projection = projection + centered_val * components[[j, k]];
1289 }
1290 transformeddata[[i, k]] = projection;
1291 }
1292 }
1293
1294 Ok(PCAResult {
1295 components,
1296 explained_variance,
1297 transformeddata,
1298 mean: means.clone(),
1299 })
1300}
1301
1302#[allow(dead_code)]
1303fn checkarray_finite_2d<F, D>(arr: &ArrayBase<D, Ix2>, name: &str) -> StatsResult<()>
1304where
1305 F: Float,
1306 D: Data<Elem = F>,
1307{
1308 for &val in arr.iter() {
1309 if !val.is_finite() {
1310 return Err(StatsError::InvalidArgument(format!(
1311 "{} contains non-finite values",
1312 name
1313 )));
1314 }
1315 }
1316 Ok(())
1317}
1318
1319#[cfg(test)]
1320mod tests {
1321 use super::*;
1322 use scirs2_core::ndarray::array;
1323
1324 #[test]
1325 fn test_adaptive_memory_manager() {
1326 let constraints = MemoryConstraints::default();
1327 let manager = AdaptiveMemoryManager::new(constraints);
1328
1329 let chunksize = manager.get_optimal_chunksize(1000, 8);
1330 assert!(chunksize > 0);
1331 assert!(chunksize <= 1000);
1332
1333 let metrics = OperationMetrics {
1334 operation_type: "test".to_string(),
1335 memory_used: 1024,
1336 processing_time: std::time::Duration::from_millis(10),
1337 chunksize_used: chunksize,
1338 };
1339
1340 manager.record_operation(metrics);
1341 let stats = manager.get_statistics();
1342 assert_eq!(stats.operations_completed, 1);
1343 }
1344
1345 #[test]
1346 fn test_cache_oblivious_matrix_mult() {
1347 let a = array![[1.0, 2.0], [3.0, 4.0]];
1348 let b = array![[5.0, 6.0], [7.0, 8.0]];
1349
1350 let result =
1351 cache_oblivious_matrix_mult(&a.view(), &b.view(), 2).expect("Operation failed");
1352
1353 assert!((result[[0, 0]] - 19.0).abs() < 1e-10);
1355 assert!((result[[0, 1]] - 22.0).abs() < 1e-10);
1356 assert!((result[[1, 0]] - 43.0).abs() < 1e-10);
1357 assert!((result[[1, 1]] - 50.0).abs() < 1e-10);
1358 }
1359}