1use crate::error::{StatsError, StatsResult};
7use scirs2_core::ndarray::{Array1, Array2, ArrayBase, ArrayView1, ArrayView2, Data, Ix2};
8use scirs2_core::numeric::{Float, NumCast, One, Zero};
9use scirs2_core::simd_ops::SimdUnifiedOps;
10use std::cmp::Ordering;
11use std::collections::VecDeque;
12use std::sync::{Arc, Mutex};
13
14#[derive(Debug, Clone)]
16pub struct MemoryConstraints {
17 pub max_memory_bytes: usize,
19 pub chunksize: usize,
21 pub use_memory_mapping: bool,
23 pub enable_gc_hints: bool,
25}
26
27impl Default for MemoryConstraints {
28 fn default() -> Self {
29 Self {
31 max_memory_bytes: 1_024 * 1_024 * 1_024,
32 chunksize: 64 * 1024,
33 use_memory_mapping: true,
34 enable_gc_hints: true,
35 }
36 }
37}
38
39pub struct AdaptiveMemoryManager {
41 constraints: MemoryConstraints,
42 current_usage: Arc<Mutex<usize>>,
43 peak_usage: Arc<Mutex<usize>>,
44 operation_history: Arc<Mutex<VecDeque<OperationMetrics>>>,
45}
46
47#[derive(Debug, Clone)]
48pub struct OperationMetrics {
49 operation_type: String,
50 memory_used: usize,
51 processing_time: std::time::Duration,
52 chunksize_used: usize,
53}
54
55impl AdaptiveMemoryManager {
56 pub fn new(constraints: MemoryConstraints) -> Self {
57 Self {
58 constraints,
59 current_usage: Arc::new(Mutex::new(0)),
60 peak_usage: Arc::new(Mutex::new(0)),
61 operation_history: Arc::new(Mutex::new(VecDeque::with_capacity(100))),
62 }
63 }
64
65 pub fn get_optimal_chunksize(&self, datasize: usize, elementsize: usize) -> usize {
67 let current_usage = *self.current_usage.lock().expect("Operation failed");
68 let available_memory = self
69 .constraints
70 .max_memory_bytes
71 .saturating_sub(current_usage);
72
73 let max_chunk_memory = available_memory * 4 / 5;
75 let max_chunk_elements = max_chunk_memory / elementsize;
76
77 let optimalsize = max_chunk_elements
79 .min(datasize)
80 .min(self.constraints.chunksize);
81 optimalsize.next_power_of_two() / 2 }
83
84 pub fn record_operation(&self, metrics: OperationMetrics) {
86 let mut history = self.operation_history.lock().expect("Operation failed");
87
88 if history.len() >= 100 {
90 history.pop_front();
91 }
92
93 history.push_back(metrics.clone());
94
95 let mut peak = self.peak_usage.lock().expect("Operation failed");
97 *peak = (*peak).max(metrics.memory_used);
98 }
99
100 pub fn get_statistics(&self) -> MemoryStatistics {
102 let current_usage = *self.current_usage.lock().expect("Operation failed");
103 let peak_usage = *self.peak_usage.lock().expect("Operation failed");
104 let history = self.operation_history.lock().expect("Operation failed");
105
106 let avg_memory_per_op = if !history.is_empty() {
107 history.iter().map(|m| m.memory_used).sum::<usize>() / history.len()
108 } else {
109 0
110 };
111
112 MemoryStatistics {
113 current_usage,
114 peak_usage,
115 avg_memory_per_operation: avg_memory_per_op,
116 operations_completed: history.len(),
117 memory_efficiency: if peak_usage > 0 {
118 (avg_memory_per_op as f64 / peak_usage as f64) * 100.0
119 } else {
120 100.0
121 },
122 }
123 }
124}
125
126#[derive(Debug, Clone)]
127pub struct MemoryStatistics {
128 pub current_usage: usize,
129 pub peak_usage: usize,
130 pub avg_memory_per_operation: usize,
131 pub operations_completed: usize,
132 pub memory_efficiency: f64,
133}
134
135#[allow(dead_code)]
140pub fn corrcoef_memory_aware<F>(
141 data: &ArrayView2<F>,
142 method: &str,
143 manager: &mut AdaptiveMemoryManager,
144) -> StatsResult<Array2<F>>
145where
146 F: Float
147 + NumCast
148 + SimdUnifiedOps
149 + Zero
150 + One
151 + Copy
152 + Send
153 + Sync
154 + std::iter::Sum<F>
155 + std::fmt::Debug
156 + std::fmt::Display,
157{
158 let start_time = std::time::Instant::now();
159
160 checkarray_finite_2d(data, "data")?;
161
162 let (n_obs, n_vars) = data.dim();
163 let elementsize = std::mem::size_of::<F>();
164
165 let matrix_memory = n_vars * n_vars * elementsize;
167 let temp_memory = n_obs * elementsize * 2; let total_estimated = matrix_memory + temp_memory;
169
170 let mut corr_matrix = Array2::<F>::zeros((n_vars, n_vars));
171
172 for i in 0..n_vars {
174 corr_matrix[[i, i]] = F::one();
175 }
176
177 if total_estimated <= manager.constraints.max_memory_bytes {
178 corr_matrix = compute_correlation_matrix_standard(data, method)?;
180 } else {
181 let blocksize = manager.get_optimal_chunksize(n_vars, elementsize * n_vars);
183 corr_matrix = compute_correlation_matrix_blocked(data, method, blocksize)?;
184 }
185
186 let metrics = OperationMetrics {
188 operation_type: format!("corrcoef_memory_aware_{}", method),
189 memory_used: total_estimated,
190 processing_time: start_time.elapsed(),
191 chunksize_used: n_vars,
192 };
193 manager.record_operation(metrics);
194
195 Ok(corr_matrix)
196}
197
198#[allow(dead_code)]
200pub fn cache_oblivious_matrix_mult<F>(
201 a: &ArrayView2<F>,
202 b: &ArrayView2<F>,
203 threshold: usize,
204) -> StatsResult<Array2<F>>
205where
206 F: Float + NumCast + Zero + One + Copy + Send + Sync + std::fmt::Display,
207{
208 let (m, n) = a.dim();
209 let (n2, p) = b.dim();
210
211 if n != n2 {
212 return Err(StatsError::DimensionMismatch(
213 "Matrix dimensions don't match for multiplication".to_string(),
214 ));
215 }
216
217 let mut result = Array2::<F>::zeros((m, p));
218
219 if m <= threshold && n <= threshold && p <= threshold {
220 for i in 0..m {
222 for j in 0..p {
223 let mut sum = F::zero();
224 for k in 0..n {
225 sum = sum + a[[i, k]] * b[[k, j]];
226 }
227 result[[i, j]] = sum;
228 }
229 }
230 } else {
231 let mid_m = m / 2;
233 let _mid_n = n / 2;
234 let _mid_p = p / 2;
235
236 if m > threshold {
239 let a_top = a.slice(scirs2_core::ndarray::s![..mid_m, ..]);
240 let a_bottom = a.slice(scirs2_core::ndarray::s![mid_m.., ..]);
241
242 let result_top = cache_oblivious_matrix_mult(&a_top, b, threshold)?;
243 let result_bottom = cache_oblivious_matrix_mult(&a_bottom, b, threshold)?;
244
245 result
246 .slice_mut(scirs2_core::ndarray::s![..mid_m, ..])
247 .assign(&result_top);
248 result
249 .slice_mut(scirs2_core::ndarray::s![mid_m.., ..])
250 .assign(&result_bottom);
251 }
252 }
253
254 Ok(result)
255}
256
257#[allow(dead_code)]
259pub fn streaming_covariance_matrix<'a, F>(
260 data_chunks: impl Iterator<Item = ArrayView2<'a, F>>,
261 manager: &mut AdaptiveMemoryManager,
262) -> StatsResult<Array2<F>>
263where
264 F: Float + NumCast + Zero + One + Copy + 'a + std::fmt::Display,
265{
266 let start_time = std::time::Instant::now();
267
268 let mut n_vars = 0;
269 let mut total_obs = 0;
270 let mut sum_x = Array1::<F>::zeros(0);
271 let mut sum_x2 = Array2::<F>::zeros((0, 0));
272 let mut initialized = false;
273
274 for chunk in data_chunks {
275 let (chunk_obs, chunk_vars) = chunk.dim();
276
277 if !initialized {
278 n_vars = chunk_vars;
279 sum_x = Array1::<F>::zeros(n_vars);
280 sum_x2 = Array2::<F>::zeros((n_vars, n_vars));
281 initialized = true;
282 } else if chunk_vars != n_vars {
283 return Err(StatsError::DimensionMismatch(
284 "All _chunks must have the same number of variables".to_string(),
285 ));
286 }
287
288 total_obs += chunk_obs;
289
290 for i in 0..chunk_obs {
292 let row = chunk.row(i);
293
294 for j in 0..n_vars {
296 sum_x[j] = sum_x[j] + row[j];
297 }
298
299 for j in 0..n_vars {
301 for k in j..n_vars {
302 let product = row[j] * row[k];
303 sum_x2[[j, k]] = sum_x2[[j, k]] + product;
304 if j != k {
305 sum_x2[[k, j]] = sum_x2[[k, j]] + product;
306 }
307 }
308 }
309 }
310 }
311
312 if total_obs == 0 {
313 return Err(StatsError::InvalidArgument("No data provided".to_string()));
314 }
315
316 let mut cov_matrix = Array2::<F>::zeros((n_vars, n_vars));
318 let n_f = F::from(total_obs).expect("Failed to convert to float");
319
320 for i in 0..n_vars {
321 for j in 0..n_vars {
322 let mean_i = sum_x[i] / n_f;
323 let mean_j = sum_x[j] / n_f;
324 let cov = (sum_x2[[i, j]] / n_f) - (mean_i * mean_j);
325 cov_matrix[[i, j]] = cov;
326 }
327 }
328
329 let memory_used = (n_vars * n_vars + n_vars) * std::mem::size_of::<F>();
331 let metrics = OperationMetrics {
332 operation_type: "streaming_covariance_matrix".to_string(),
333 memory_used,
334 processing_time: start_time.elapsed(),
335 chunksize_used: n_vars,
336 };
337 manager.record_operation(metrics);
338
339 Ok(cov_matrix)
340}
341
342#[allow(dead_code)]
344pub fn pca_memory_efficient<F>(
345 data: &ArrayView2<F>,
346 n_components: Option<usize>,
347 manager: &mut AdaptiveMemoryManager,
348) -> StatsResult<PCAResult<F>>
349where
350 F: Float + NumCast + Zero + One + Copy + Send + Sync + std::fmt::Debug + std::fmt::Display,
351{
352 let start_time = std::time::Instant::now();
353
354 let (n_obs, n_vars) = data.dim();
355 let n_components = n_components.unwrap_or(n_vars.min(n_obs));
356
357 let mut means = Array1::<F>::zeros(n_vars);
359 for i in 0..n_vars {
360 let column = data.column(i);
361 means[i] = column.iter().fold(F::zero(), |acc, &x| acc + x)
362 / F::from(n_obs).expect("Failed to convert to float");
363 }
364
365 let centereddata_memory = n_obs * n_vars * std::mem::size_of::<F>();
367
368 if centereddata_memory <= manager.constraints.max_memory_bytes / 2 {
369 let mut centereddata = Array2::<F>::zeros((n_obs, n_vars));
371 for i in 0..n_obs {
372 for j in 0..n_vars {
373 centereddata[[i, j]] = data[[i, j]] - means[j];
374 }
375 }
376
377 let cov_matrix = compute_covariance_from_centered(¢ereddata.view())?;
379
380 let (eigenvectors, eigenvalues) =
382 compute_eigendecomposition(&cov_matrix.view(), n_components)?;
383
384 let transformed = matrix_multiply(¢ereddata.view(), &eigenvectors.view())?;
386
387 let result = PCAResult {
388 components: eigenvectors,
389 explained_variance: eigenvalues,
390 transformeddata: transformed,
391 mean: means,
392 };
393
394 let metrics = OperationMetrics {
395 operation_type: "pca_memory_efficient".to_string(),
396 memory_used: centereddata_memory,
397 processing_time: start_time.elapsed(),
398 chunksize_used: n_vars,
399 };
400 manager.record_operation(metrics);
401
402 Ok(result)
403 } else {
404 incremental_pca(data, n_components, &means, manager)
406 }
407}
408
409#[derive(Debug, Clone)]
410pub struct PCAResult<F> {
411 pub components: Array2<F>,
412 pub explained_variance: Array1<F>,
413 pub transformeddata: Array2<F>,
414 pub mean: Array1<F>,
415}
416
417#[allow(dead_code)]
419pub fn streaming_pca_enhanced<'a, F>(
420 data_chunks: impl Iterator<Item = ArrayView2<'a, F>>,
421 n_components: usize,
422 manager: &mut AdaptiveMemoryManager,
423) -> StatsResult<PCAResult<F>>
424where
425 F: Float + NumCast + Zero + One + Copy + 'a + std::fmt::Display,
426{
427 let start_time = std::time::Instant::now();
428
429 let mut n_samples_ = 0;
431 let mut n_features = 0;
432 let mut running_mean = Array1::<F>::zeros(0);
433 let mut running_cov = Array2::<F>::zeros((0, 0));
434 let mut initialized = false;
435
436 for chunk in data_chunks {
437 let (chunk_samples, chunk_features) = chunk.dim();
438
439 if !initialized {
440 n_features = chunk_features;
441 running_mean = Array1::<F>::zeros(n_features);
442 running_cov = Array2::<F>::zeros((n_features, n_features));
443 initialized = true;
444 } else if chunk_features != n_features {
445 return Err(StatsError::DimensionMismatch(
446 "All _chunks must have same number of features".to_string(),
447 ));
448 }
449
450 for i in 0..chunk_samples {
452 n_samples_ += 1;
453 let row = chunk.row(i);
454
455 let n_f = F::from(n_samples_).expect("Failed to convert to float");
457 for j in 0..n_features {
458 let delta = row[j] - running_mean[j];
459 running_mean[j] = running_mean[j] + delta / n_f;
460 }
461
462 if n_samples_ > 1 {
464 for j in 0..n_features {
465 for k in j..n_features {
466 let prod = (row[j] - running_mean[j]) * (row[k] - running_mean[k]);
467 running_cov[[j, k]] = running_cov[[j, k]]
468 * F::from(n_samples_ - 1).expect("Failed to convert to float")
469 / n_f
470 + prod / n_f;
471 if j != k {
472 running_cov[[k, j]] = running_cov[[j, k]];
473 }
474 }
475 }
476 }
477 }
478 }
479
480 if n_samples_ == 0 {
481 return Err(StatsError::InvalidArgument("No data provided".to_string()));
482 }
483
484 let (components, explained_variance) =
486 compute_eigendecomposition(&running_cov.view(), n_components)?;
487
488 let memory_used =
490 n_features * n_features * std::mem::size_of::<F>() + n_features * std::mem::size_of::<F>();
491 manager.record_operation(OperationMetrics {
492 operation_type: "streaming_pca_enhanced".to_string(),
493 memory_used,
494 processing_time: start_time.elapsed(),
495 chunksize_used: manager.constraints.chunksize,
496 });
497
498 Ok(PCAResult {
499 components,
500 explained_variance,
501 transformeddata: Array2::zeros((0, 0)), mean: running_mean,
503 })
504}
505
506#[allow(dead_code)]
508pub fn streaming_histogram_adaptive<'a, F>(
509 data_chunks: impl Iterator<Item = ArrayView1<'a, F>>,
510 manager: &mut AdaptiveMemoryManager,
511) -> StatsResult<(Array1<F>, Array1<usize>)>
512where
513 F: Float + NumCast + Zero + One + Copy + PartialOrd + 'a + std::fmt::Display,
514{
515 let start_time = std::time::Instant::now();
516
517 let mut min_val = F::infinity();
518 let mut max_val = F::neg_infinity();
519 let mut total_count = 0;
520 let mut values = Vec::new(); for chunk in data_chunks {
524 for &value in chunk.iter() {
525 if value < min_val {
526 min_val = value;
527 }
528 if value > max_val {
529 max_val = value;
530 }
531 total_count += 1;
532
533 if values.len() < manager.constraints.chunksize {
535 values.push(value);
536 } else {
537 let j = {
538 use scirs2_core::random::Rng;
539 let mut rng = scirs2_core::random::thread_rng();
540 rng.random_range(0..total_count)
541 };
542 if j < values.len() {
543 values[j] = value;
544 }
545 }
546 }
547 }
548
549 if total_count == 0 {
550 return Err(StatsError::InvalidArgument("No data provided".to_string()));
551 }
552
553 values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal));
555 let q75_idx = (values.len() as f64 * 0.75) as usize;
556 let q25_idx = (values.len() as f64 * 0.25) as usize;
557 let iqr = values[q75_idx] - values[q25_idx];
558 let h = F::from(2.0).expect("Failed to convert constant to float") * iqr
559 / F::from(total_count as f64)
560 .expect("Operation failed")
561 .powf(F::from(1.0 / 3.0).expect("Failed to convert to float"));
562 let n_bins = if h > F::zero() {
563 ((max_val - min_val) / h)
564 .to_usize()
565 .unwrap_or(50)
566 .max(10)
567 .min(1000)
568 } else {
569 50 };
571
572 let bin_width = (max_val - min_val) / F::from(n_bins).expect("Failed to convert to float");
573 let mut bin_edges = Array1::<F>::zeros(n_bins + 1);
574 let mut counts = Array1::<usize>::zeros(n_bins);
575
576 for i in 0..=n_bins {
578 bin_edges[i] = min_val + F::from(i).expect("Failed to convert to float") * bin_width;
579 }
580
581 for &value in values.iter() {
584 if value >= min_val && value <= max_val {
585 let bin_idx = ((value - min_val) / bin_width)
586 .to_usize()
587 .unwrap_or(0)
588 .min(n_bins - 1);
589 counts[bin_idx] += 1;
590 }
591 }
592
593 let memory_used = n_bins * (std::mem::size_of::<F>() + std::mem::size_of::<usize>());
594 manager.record_operation(OperationMetrics {
595 operation_type: "streaming_histogram_adaptive".to_string(),
596 memory_used,
597 processing_time: start_time.elapsed(),
598 chunksize_used: manager.constraints.chunksize,
599 });
600
601 Ok((bin_edges, counts))
602}
603
604#[allow(dead_code)]
606pub fn streaming_quantiles_p2<'a, F>(
607 data_chunks: impl Iterator<Item = ArrayView1<'a, F>>,
608 quantiles: &[f64],
609 manager: &mut AdaptiveMemoryManager,
610) -> StatsResult<Array1<F>>
611where
612 F: Float + NumCast + Zero + One + Copy + PartialOrd + 'a + std::fmt::Display,
613{
614 let start_time = std::time::Instant::now();
615
616 let mut p2_estimators: Vec<P2Estimator<F>> =
617 quantiles.iter().map(|&q| P2Estimator::new(q)).collect();
618
619 let mut total_count = 0;
620
621 for chunk in data_chunks {
622 for &value in chunk.iter() {
623 total_count += 1;
624 for estimator in &mut p2_estimators {
625 estimator.update(value);
626 }
627 }
628 }
629
630 if total_count == 0 {
631 return Err(StatsError::InvalidArgument("No data provided".to_string()));
632 }
633
634 let results: Vec<F> = p2_estimators.iter().map(|est| est.quantile()).collect();
635
636 let memory_used = quantiles.len() * std::mem::size_of::<P2Estimator<F>>();
637 manager.record_operation(OperationMetrics {
638 operation_type: "streaming_quantiles_p2".to_string(),
639 memory_used,
640 processing_time: start_time.elapsed(),
641 chunksize_used: manager.constraints.chunksize,
642 });
643
644 Ok(Array1::from_vec(results))
645}
646
647#[derive(Debug, Clone)]
649struct P2Estimator<F> {
650 quantile: f64,
651 markers: [F; 5],
652 positions: [f64; 5],
653 desired_positions: [f64; 5],
654 increment: [f64; 5],
655 count: usize,
656}
657
658impl<F> P2Estimator<F>
659where
660 F: Float + NumCast + Copy + PartialOrd + std::fmt::Display,
661{
662 fn new(quantile: f64) -> Self {
663 let mut estimator = Self {
664 quantile,
665 markers: [F::zero(); 5],
666 positions: [1.0, 2.0, 3.0, 4.0, 5.0],
667 desired_positions: [
668 1.0,
669 1.0 + 2.0 * quantile,
670 1.0 + 4.0 * quantile,
671 3.0 + 2.0 * quantile,
672 5.0,
673 ],
674 increment: [0.0, quantile / 2.0, quantile, (1.0 + quantile) / 2.0, 1.0],
675 count: 0,
676 };
677
678 estimator.desired_positions[1] = 1.0 + 2.0 * quantile;
680 estimator.desired_positions[2] = 1.0 + 4.0 * quantile;
681 estimator.desired_positions[3] = 3.0 + 2.0 * quantile;
682
683 estimator
684 }
685
686 fn update(&mut self, value: F) {
687 self.count += 1;
688
689 if self.count <= 5 {
690 self.markers[self.count - 1] = value;
692 if self.count == 5 {
693 self.markers
694 .sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
695 }
696 return;
697 }
698
699 let mut k = 0;
701 for i in 0..4 {
702 if value >= self.markers[i] {
703 k = i + 1;
704 }
705 }
706
707 if k == 0 {
708 self.markers[0] = value;
709 k = 1;
710 } else if k == 5 {
711 self.markers[4] = value;
712 k = 4;
713 }
714
715 for i in k..5 {
717 self.positions[i] += 1.0;
718 }
719
720 for i in 0..5 {
722 self.desired_positions[i] += self.increment[i];
723 }
724
725 for i in 1..4 {
727 let d = self.desired_positions[i] - self.positions[i];
728 if (d >= 1.0 && self.positions[i + 1] - self.positions[i] > 1.0)
729 || (d <= -1.0 && self.positions[i - 1] - self.positions[i] < -1.0)
730 {
731 let d_sign = if d >= 0.0 { 1.0 } else { -1.0 };
732 let new_height = self.parabolic_prediction(i, d_sign);
733
734 if self.markers[i - 1] < new_height && new_height < self.markers[i + 1] {
735 self.markers[i] = new_height;
736 } else {
737 self.markers[i] = self.linear_prediction(i, d_sign);
738 }
739 self.positions[i] += d_sign;
740 }
741 }
742 }
743
744 fn parabolic_prediction(&self, i: usize, d: f64) -> F {
745 let qi = self.markers[i];
746 let qi_prev = self.markers[i - 1];
747 let qi_next = self.markers[i + 1];
748 let ni = self.positions[i];
749 let ni_prev = self.positions[i - 1];
750 let ni_next = self.positions[i + 1];
751
752 let d_f = F::from(d).expect("Failed to convert to float");
753 let ni_f = F::from(ni).expect("Failed to convert to float");
754 let ni_prev_f = F::from(ni_prev).expect("Failed to convert to float");
755 let ni_next_f = F::from(ni_next).expect("Failed to convert to float");
756
757 let a = d_f / (ni_next_f - ni_prev_f);
758 let b1 = (ni_f - ni_prev_f + d_f) * (qi_next - qi) / (ni_next_f - ni_f);
759 let b2 = (ni_next_f - ni_f - d_f) * (qi - qi_prev) / (ni_f - ni_prev_f);
760
761 qi + a * (b1 + b2)
762 }
763
764 fn linear_prediction(&self, i: usize, d: f64) -> F {
765 if d > 0.0 {
766 let qi_next = self.markers[i + 1];
767 let qi = self.markers[i];
768 let ni_next = self.positions[i + 1];
769 let ni = self.positions[i];
770 qi + (qi_next - qi) * F::from(d / (ni_next - ni)).expect("Operation failed")
771 } else {
772 let qi = self.markers[i];
773 let qi_prev = self.markers[i - 1];
774 let ni = self.positions[i];
775 let ni_prev = self.positions[i - 1];
776 qi + (qi_prev - qi) * F::from(-d / (ni - ni_prev)).expect("Operation failed")
777 }
778 }
779
780 fn quantile(&self) -> F {
781 if self.count < 5 {
782 let mut sorted = self.markers[..self.count.min(5)].to_vec();
784 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
785 let idx = (self.quantile * (sorted.len() - 1) as f64) as usize;
786 sorted[idx]
787 } else {
788 self.markers[2] }
790 }
791}
792
793#[allow(dead_code)]
795pub fn streaming_regression_enhanced<'a, F>(
796 data_chunks: impl Iterator<Item = (ArrayView2<'a, F>, ArrayView1<'a, F>)>,
797 regularization: F,
798 manager: &mut AdaptiveMemoryManager,
799) -> StatsResult<Array1<F>>
800where
801 F: Float + NumCast + Zero + One + Copy + 'a + std::fmt::Display,
802{
803 let start_time = std::time::Instant::now();
804
805 let mut xtx = Array2::<F>::zeros((0, 0));
806 let mut xty = Array1::<F>::zeros(0);
807 let mut n_samples_ = 0;
808 let mut n_features = 0;
809 let mut initialized = false;
810
811 for (x_chunk, y_chunk) in data_chunks {
813 let (chunk_samples, chunk_features) = x_chunk.dim();
814
815 if !initialized {
816 n_features = chunk_features;
817 xtx = Array2::<F>::zeros((n_features, n_features));
818 xty = Array1::<F>::zeros(n_features);
819 initialized = true;
820 } else if chunk_features != n_features {
821 return Err(StatsError::DimensionMismatch(
822 "All _chunks must have same number of features".to_string(),
823 ));
824 }
825
826 if y_chunk.len() != chunk_samples {
827 return Err(StatsError::DimensionMismatch(
828 "X and y must have same number of samples".to_string(),
829 ));
830 }
831
832 n_samples_ += chunk_samples;
833
834 for i in 0..n_features {
836 for j in i..n_features {
837 let mut sum = F::zero();
838 for k in 0..chunk_samples {
839 sum = sum + x_chunk[[k, i]] * x_chunk[[k, j]];
840 }
841 xtx[[i, j]] = xtx[[i, j]] + sum;
842 if i != j {
843 xtx[[j, i]] = xtx[[i, j]];
844 }
845 }
846 }
847
848 for i in 0..n_features {
850 let mut sum = F::zero();
851 for k in 0..chunk_samples {
852 sum = sum + x_chunk[[k, i]] * y_chunk[k];
853 }
854 xty[i] = xty[i] + sum;
855 }
856 }
857
858 if n_samples_ == 0 {
859 return Err(StatsError::InvalidArgument("No data provided".to_string()));
860 }
861
862 for i in 0..n_features {
864 xtx[[i, i]] = xtx[[i, i]] + regularization;
865 }
866
867 let coefficients = solve_linear_system(&xtx.view(), &xty.view())?;
870
871 let memory_used =
872 n_features * n_features * std::mem::size_of::<F>() + n_features * std::mem::size_of::<F>();
873 manager.record_operation(OperationMetrics {
874 operation_type: "streaming_regression_enhanced".to_string(),
875 memory_used,
876 processing_time: start_time.elapsed(),
877 chunksize_used: manager.constraints.chunksize,
878 });
879
880 Ok(coefficients)
881}
882
883#[allow(dead_code)]
885fn solve_linear_system<F>(a: &ArrayView2<F>, b: &ArrayView1<F>) -> StatsResult<Array1<F>>
886where
887 F: Float + NumCast + Zero + One + Copy + std::fmt::Display,
888{
889 let n = a.nrows();
890 if a.ncols() != n || b.len() != n {
891 return Err(StatsError::DimensionMismatch(
892 "Matrix dimensions incompatible".to_string(),
893 ));
894 }
895
896 let mut aug = Array2::<F>::zeros((n, n + 1));
898
899 for i in 0..n {
901 for j in 0..n {
902 aug[[i, j]] = a[[i, j]];
903 }
904 aug[[i, n]] = b[i];
905 }
906
907 for i in 0..n {
909 let mut max_row = i;
911 for k in (i + 1)..n {
912 if aug[[k, i]].abs() > aug[[max_row, i]].abs() {
913 max_row = k;
914 }
915 }
916
917 if max_row != i {
919 for j in 0..=n {
920 let temp = aug[[i, j]];
921 aug[[i, j]] = aug[[max_row, j]];
922 aug[[max_row, j]] = temp;
923 }
924 }
925
926 let pivot = aug[[i, i]];
928 if pivot.abs() < F::from(1e-12).expect("Failed to convert constant to float") {
929 return Err(StatsError::ComputationError(
930 "Matrix is singular".to_string(),
931 ));
932 }
933
934 for j in i..=n {
935 aug[[i, j]] = aug[[i, j]] / pivot;
936 }
937
938 for k in 0..n {
940 if k != i {
941 let factor = aug[[k, i]];
942 for j in i..=n {
943 aug[[k, j]] = aug[[k, j]] - factor * aug[[i, j]];
944 }
945 }
946 }
947 }
948
949 let mut solution = Array1::<F>::zeros(n);
951 for i in 0..n {
952 solution[i] = aug[[i, n]];
953 }
954
955 Ok(solution)
956}
957
958#[allow(dead_code)]
961fn compute_correlation_matrix_standard<F>(
962 data: &ArrayView2<F>,
963 method: &str,
964) -> StatsResult<Array2<F>>
965where
966 F: Float
967 + NumCast
968 + Zero
969 + One
970 + Copy
971 + std::iter::Sum<F>
972 + std::fmt::Debug
973 + std::fmt::Display
974 + Send
975 + Sync
976 + scirs2_core::simd_ops::SimdUnifiedOps,
977{
978 crate::corrcoef(data, method)
980}
981
982#[allow(dead_code)]
983fn compute_correlation_matrix_blocked<F>(
984 data: &ArrayView2<F>,
985 method: &str,
986 blocksize: usize,
987) -> StatsResult<Array2<F>>
988where
989 F: Float
990 + NumCast
991 + Zero
992 + One
993 + Copy
994 + std::iter::Sum<F>
995 + std::fmt::Debug
996 + std::fmt::Display
997 + scirs2_core::simd_ops::SimdUnifiedOps,
998{
999 let (_, n_vars) = data.dim();
1000 let mut corr_matrix = Array2::<F>::zeros((n_vars, n_vars));
1001
1002 for i in 0..n_vars {
1004 corr_matrix[[i, i]] = F::one();
1005 }
1006
1007 for i_block in (0..n_vars).step_by(blocksize) {
1009 let i_end = (i_block + blocksize).min(n_vars);
1010
1011 for j_block in (i_block..n_vars).step_by(blocksize) {
1012 let j_end = (j_block + blocksize).min(n_vars);
1013
1014 for i in i_block..i_end {
1016 for j in j_block.max(i + 1)..j_end {
1017 let col_i = data.column(i);
1018 let col_j = data.column(j);
1019
1020 let corr = match method {
1021 "pearson" => crate::pearson_r(&col_i, &col_j)?,
1022 "spearman" => crate::spearman_r(&col_i, &col_j)?,
1023 "kendall" => crate::kendall_tau(&col_i, &col_j, "b")?,
1024 _ => {
1025 return Err(StatsError::InvalidArgument(format!(
1026 "Unknown method: {}",
1027 method
1028 )))
1029 }
1030 };
1031
1032 corr_matrix[[i, j]] = corr;
1033 corr_matrix[[j, i]] = corr;
1034 }
1035 }
1036 }
1037 }
1038
1039 Ok(corr_matrix)
1040}
1041
1042#[allow(dead_code)]
1043fn compute_covariance_from_centered<F>(data: &ArrayView2<F>) -> StatsResult<Array2<F>>
1044where
1045 F: Float + NumCast + Zero + Copy + std::fmt::Display,
1046{
1047 let (n_obs, n_vars) = data.dim();
1048 let mut cov_matrix = Array2::<F>::zeros((n_vars, n_vars));
1049 let n_f = F::from(n_obs - 1).expect("Failed to convert to float"); for i in 0..n_vars {
1052 for j in i..n_vars {
1053 let mut cov = F::zero();
1054 for k in 0..n_obs {
1055 cov = cov + data[[k, i]] * data[[k, j]];
1056 }
1057 cov = cov / n_f;
1058 cov_matrix[[i, j]] = cov;
1059 cov_matrix[[j, i]] = cov;
1060 }
1061 }
1062
1063 Ok(cov_matrix)
1064}
1065
1066#[allow(dead_code)]
1067fn compute_eigendecomposition<F>(
1068 matrix: &ArrayView2<F>,
1069 n_components: usize,
1070) -> StatsResult<(Array2<F>, Array1<F>)>
1071where
1072 F: Float + NumCast + Zero + One + Copy + std::fmt::Display,
1073{
1074 let n = matrix.dim().0;
1075 let n_components = n_components.min(n);
1076
1077 let mut eigenvalues = Array1::<F>::zeros(n_components);
1080 let mut eigenvectors = Array2::<F>::zeros((n, n_components));
1081
1082 for k in 0..n_components {
1083 let mut v = Array1::<F>::ones(n);
1085
1086 for _ in 0..100 {
1088 let mut new_v = Array1::<F>::zeros(n);
1090
1091 for i in 0..n {
1093 let mut sum = F::zero();
1094 for j in 0..n {
1095 sum = sum + matrix[[i, j]] * v[j];
1096 }
1097 new_v[i] = sum;
1098 }
1099
1100 for prev_k in 0..k {
1102 let mut dot_product = F::zero();
1103 for i in 0..n {
1104 dot_product = dot_product + new_v[i] * eigenvectors[[i, prev_k]];
1105 }
1106
1107 for i in 0..n {
1108 new_v[i] = new_v[i] - dot_product * eigenvectors[[i, prev_k]];
1109 }
1110 }
1111
1112 let mut norm = F::zero();
1114 for i in 0..n {
1115 norm = norm + new_v[i] * new_v[i];
1116 }
1117 norm = norm.sqrt();
1118
1119 if norm > F::epsilon() {
1120 for i in 0..n {
1121 new_v[i] = new_v[i] / norm;
1122 }
1123 }
1124
1125 let mut converged = true;
1127 for i in 0..n {
1128 if (new_v[i] - v[i]).abs()
1129 > F::from(1e-6).expect("Failed to convert constant to float")
1130 {
1131 converged = false;
1132 break;
1133 }
1134 }
1135
1136 v = new_v;
1137
1138 if converged {
1139 break;
1140 }
1141 }
1142
1143 let mut eigenvalue = F::zero();
1145 for i in 0..n {
1146 let mut sum = F::zero();
1147 for j in 0..n {
1148 sum = sum + matrix[[i, j]] * v[j];
1149 }
1150 eigenvalue = eigenvalue + v[i] * sum;
1151 }
1152
1153 eigenvalues[k] = eigenvalue;
1154 for i in 0..n {
1155 eigenvectors[[i, k]] = v[i];
1156 }
1157 }
1158
1159 Ok((eigenvectors, eigenvalues))
1160}
1161
1162#[allow(dead_code)]
1163fn matrix_multiply<F>(a: &ArrayView2<F>, b: &ArrayView2<F>) -> StatsResult<Array2<F>>
1164where
1165 F: Float + NumCast + Zero + Copy + std::fmt::Display,
1166{
1167 let (m, n) = a.dim();
1168 let (n2, p) = b.dim();
1169
1170 if n != n2 {
1171 return Err(StatsError::DimensionMismatch(
1172 "Matrix dimensions don't match".to_string(),
1173 ));
1174 }
1175
1176 let mut result = Array2::<F>::zeros((m, p));
1177
1178 for i in 0..m {
1179 for j in 0..p {
1180 let mut sum = F::zero();
1181 for k in 0..n {
1182 sum = sum + a[[i, k]] * b[[k, j]];
1183 }
1184 result[[i, j]] = sum;
1185 }
1186 }
1187
1188 Ok(result)
1189}
1190
1191#[allow(dead_code)]
1192fn incremental_pca<F>(
1193 data: &ArrayView2<F>,
1194 n_components: usize,
1195 means: &Array1<F>,
1196 manager: &mut AdaptiveMemoryManager,
1197) -> StatsResult<PCAResult<F>>
1198where
1199 F: Float + NumCast + Zero + One + Copy + Send + Sync + std::fmt::Debug + std::fmt::Display,
1200{
1201 let (n_obs, n_vars) = data.dim();
1202 let n_components = n_components.min(n_vars);
1203
1204 let batchsize = manager.get_optimal_chunksize(n_obs, std::mem::size_of::<F>() * n_vars);
1206
1207 let mut components = Array2::<F>::zeros((n_vars, n_components));
1209 for i in 0..n_components {
1210 components[[i % n_vars, i]] = F::one();
1211 }
1212
1213 let mut explained_variance = Array1::<F>::zeros(n_components);
1214 let mut _n_samples_seen = 0;
1215
1216 for batch_start in (0..n_obs).step_by(batchsize) {
1218 let batch_end = (batch_start + batchsize).min(n_obs);
1219 let batch = data.slice(scirs2_core::ndarray::s![batch_start..batch_end, ..]);
1220
1221 let mut centered_batch = Array2::<F>::zeros(batch.dim());
1223 for i in 0..batch.nrows() {
1224 for j in 0..batch.ncols() {
1225 centered_batch[[i, j]] = batch[[i, j]] - means[j];
1226 }
1227 }
1228
1229 for k in 0..n_components {
1231 let component = components.column(k).to_owned();
1232
1233 let mut projections = Array1::<F>::zeros(batch.nrows());
1235 for i in 0..batch.nrows() {
1236 let mut projection = F::zero();
1237 for j in 0..n_vars {
1238 projection = projection + centered_batch[[i, j]] * component[j];
1239 }
1240 projections[i] = projection;
1241 }
1242
1243 let mut new_component = Array1::<F>::zeros(n_vars);
1245 for j in 0..n_vars {
1246 let mut sum = F::zero();
1247 for i in 0..batch.nrows() {
1248 sum = sum + centered_batch[[i, j]] * projections[i];
1249 }
1250 new_component[j] = sum;
1251 }
1252
1253 let mut norm = F::zero();
1255 for j in 0..n_vars {
1256 norm = norm + new_component[j] * new_component[j];
1257 }
1258 norm = norm.sqrt();
1259
1260 if norm > F::epsilon() {
1261 for j in 0..n_vars {
1262 components[[j, k]] = new_component[j] / norm;
1263 }
1264
1265 let variance = projections
1267 .iter()
1268 .map(|&x| x * x)
1269 .fold(F::zero(), |acc, x| acc + x);
1270 explained_variance[k] =
1271 variance / F::from(batch.nrows()).expect("Operation failed");
1272 }
1273 }
1274
1275 _n_samples_seen += batch.nrows();
1276 }
1277
1278 let mut transformeddata = Array2::<F>::zeros((n_obs, n_components));
1280 for i in 0..n_obs {
1281 for k in 0..n_components {
1282 let mut projection = F::zero();
1283 for j in 0..n_vars {
1284 let centered_val = data[[i, j]] - means[j];
1285 projection = projection + centered_val * components[[j, k]];
1286 }
1287 transformeddata[[i, k]] = projection;
1288 }
1289 }
1290
1291 Ok(PCAResult {
1292 components,
1293 explained_variance,
1294 transformeddata,
1295 mean: means.clone(),
1296 })
1297}
1298
1299#[allow(dead_code)]
1300fn checkarray_finite_2d<F, D>(arr: &ArrayBase<D, Ix2>, name: &str) -> StatsResult<()>
1301where
1302 F: Float,
1303 D: Data<Elem = F>,
1304{
1305 for &val in arr.iter() {
1306 if !val.is_finite() {
1307 return Err(StatsError::InvalidArgument(format!(
1308 "{} contains non-finite values",
1309 name
1310 )));
1311 }
1312 }
1313 Ok(())
1314}
1315
1316#[cfg(test)]
1317mod tests {
1318 use super::*;
1319 use scirs2_core::ndarray::array;
1320
1321 #[test]
1322 fn test_adaptive_memory_manager() {
1323 let constraints = MemoryConstraints::default();
1324 let manager = AdaptiveMemoryManager::new(constraints);
1325
1326 let chunksize = manager.get_optimal_chunksize(1000, 8);
1327 assert!(chunksize > 0);
1328 assert!(chunksize <= 1000);
1329
1330 let metrics = OperationMetrics {
1331 operation_type: "test".to_string(),
1332 memory_used: 1024,
1333 processing_time: std::time::Duration::from_millis(10),
1334 chunksize_used: chunksize,
1335 };
1336
1337 manager.record_operation(metrics);
1338 let stats = manager.get_statistics();
1339 assert_eq!(stats.operations_completed, 1);
1340 }
1341
1342 #[test]
1343 fn test_cache_oblivious_matrix_mult() {
1344 let a = array![[1.0, 2.0], [3.0, 4.0]];
1345 let b = array![[5.0, 6.0], [7.0, 8.0]];
1346
1347 let result =
1348 cache_oblivious_matrix_mult(&a.view(), &b.view(), 2).expect("Operation failed");
1349
1350 assert!((result[[0, 0]] - 19.0).abs() < 1e-10);
1352 assert!((result[[0, 1]] - 22.0).abs() < 1e-10);
1353 assert!((result[[1, 0]] - 43.0).abs() < 1e-10);
1354 assert!((result[[1, 1]] - 50.0).abs() < 1e-10);
1355 }
1356}