1use scirs2_core::ndarray::{par_azip, Array1, Array2, ArrayView2, Axis};
7use scirs2_core::parallel_ops::*;
8use scirs2_core::random::Rng;
9use scirs2_core::validation::{check_not_empty, check_positive};
10
11use crate::error::{Result, TransformError};
12use crate::utils::{DataChunker, PerfUtils, ProcessingStrategy, StatUtils};
13use statrs::statistics::Statistics;
14
15pub struct EnhancedStandardScaler {
17 means: Option<Array1<f64>>,
19 stds: Option<Array1<f64>>,
21 robust: bool,
23 strategy: ProcessingStrategy,
25 memory_limitmb: usize,
27}
28
29impl EnhancedStandardScaler {
30 pub fn new(robust: bool, memory_limitmb: usize) -> Self {
32 EnhancedStandardScaler {
33 means: None,
34 stds: None,
35 robust,
36 strategy: ProcessingStrategy::Standard,
37 memory_limitmb,
38 }
39 }
40
41 pub fn fit(&mut self, x: &ArrayView2<f64>) -> Result<()> {
43 check_not_empty(x, "x")?;
44
45 for &val in x.iter() {
47 if !val.is_finite() {
48 return Err(crate::error::TransformError::DataValidationError(
49 "Data contains non-finite values".to_string(),
50 ));
51 }
52 }
53
54 let (n_samples, n_features) = x.dim();
55
56 self.strategy =
58 PerfUtils::choose_processing_strategy(n_samples, n_features, self.memory_limitmb);
59
60 match &self.strategy {
61 ProcessingStrategy::OutOfCore { chunk_size } => self.fit_out_of_core(x, *chunk_size),
62 ProcessingStrategy::Parallel => self.fit_parallel(x),
63 ProcessingStrategy::Simd => self.fit_simd(x),
64 ProcessingStrategy::Standard => self.fit_standard(x),
65 }
66 }
67
68 fn fit_out_of_core(&mut self, x: &ArrayView2<f64>, _chunksize: usize) -> Result<()> {
70 let (n_samples, n_features) = x.dim();
71 let chunker = DataChunker::new(self.memory_limitmb);
72
73 if self.robust {
74 return self.fit_robust_out_of_core(x);
76 }
77
78 let mut means = Array1::zeros(n_features);
80 let mut m2 = Array1::zeros(n_features); let mut count = 0;
82
83 for (start_idx, end_idx) in chunker.chunk_indices(n_samples, n_features) {
84 let chunk = x.slice(scirs2_core::ndarray::s![start_idx..end_idx, ..]);
85
86 for row in chunk.rows().into_iter() {
87 count += 1;
88 let delta = &row - &means;
89 means = &means + &delta / count as f64;
90 let delta2 = &row - &means;
91 m2 = &m2 + &delta * &delta2;
92 }
93 }
94
95 let variances = if count > 1 {
96 &m2 / (count - 1) as f64
97 } else {
98 Array1::ones(n_features)
99 };
100
101 let stds = variances.mapv(|v| if v > 1e-15 { v.sqrt() } else { 1.0 });
102
103 self.means = Some(means);
104 self.stds = Some(stds);
105
106 Ok(())
107 }
108
109 fn fit_parallel(&mut self, x: &ArrayView2<f64>) -> Result<()> {
111 let (_, n_features) = x.dim();
112
113 if self.robust {
114 let (medians, mads) = StatUtils::robust_stats_columns(x)?;
115 let stds = mads.mapv(|mad| if mad > 1e-15 { mad * 1.4826 } else { 1.0 });
117 self.means = Some(medians);
118 self.stds = Some(stds);
119 } else {
120 let means: Result<Array1<f64>> = (0..n_features)
122 .into_par_iter()
123 .map(|j| {
124 let col = x.column(j);
125 Ok(col.mean())
126 })
127 .collect::<Result<Vec<_>>>()
128 .map(Array1::from_vec);
129 let means = means?;
130
131 let stds: Result<Array1<f64>> = (0..n_features)
133 .into_par_iter()
134 .map(|j| {
135 let col = x.column(j);
136 let mean = means[j];
137 let var = col.iter().map(|&val| (val - mean).powi(2)).sum::<f64>()
138 / (col.len() - 1).max(1) as f64;
139 Ok(if var > 1e-15 { var.sqrt() } else { 1.0 })
140 })
141 .collect::<Result<Vec<_>>>()
142 .map(Array1::from_vec);
143 let stds = stds?;
144
145 self.means = Some(means);
146 self.stds = Some(stds);
147 }
148
149 Ok(())
150 }
151
152 fn fit_simd(&mut self, x: &ArrayView2<f64>) -> Result<()> {
154 let means = x.mean_axis(Axis(0)).expect("Operation failed");
156
157 let (_n_samples, n_features) = x.dim();
159 let mut variances = Array1::zeros(n_features);
160
161 for j in 0..n_features {
163 let col = x.column(j);
164 let mean = means[j];
165
166 let variance = if col.len() > 1 {
167 let sum_sq_diff = col.iter().map(|&val| (val - mean).powi(2)).sum::<f64>();
168 sum_sq_diff / (col.len() - 1) as f64
169 } else {
170 1.0
171 };
172
173 variances[j] = variance;
174 }
175
176 let stds = variances.mapv(|v| if v > 1e-15 { v.sqrt() } else { 1.0 });
177
178 self.means = Some(means);
179 self.stds = Some(stds);
180
181 Ok(())
182 }
183
184 fn fit_standard(&mut self, x: &ArrayView2<f64>) -> Result<()> {
186 if self.robust {
187 let (medians, mads) = StatUtils::robust_stats_columns(x)?;
188 let stds = mads.mapv(|mad| if mad > 1e-15 { mad * 1.4826 } else { 1.0 });
189 self.means = Some(medians);
190 self.stds = Some(stds);
191 } else {
192 let means = x.mean_axis(Axis(0)).expect("Operation failed");
193 let stds = x.std_axis(Axis(0), 0.0);
194 let stds = stds.mapv(|s| if s > 1e-15 { s } else { 1.0 });
195
196 self.means = Some(means);
197 self.stds = Some(stds);
198 }
199
200 Ok(())
201 }
202
203 fn fit_robust_out_of_core(&mut self, x: &ArrayView2<f64>) -> Result<()> {
205 let (_, n_features) = x.dim();
207 let chunker = DataChunker::new(self.memory_limitmb);
208
209 let mut medians = Array1::zeros(n_features);
210 let mut mads = Array1::zeros(n_features);
211
212 for j in 0..n_features {
213 let mut column_data = Vec::new();
214
215 for (start_idx, end_idx) in chunker.chunk_indices(x.nrows(), 1) {
217 let chunk = x.slice(scirs2_core::ndarray::s![start_idx..end_idx, j..j + 1]);
218 column_data.extend(chunk.iter().copied());
219 }
220
221 let col_array = Array1::from_vec(column_data);
222 let (median, mad) = StatUtils::robust_stats(&col_array.view())?;
223
224 medians[j] = median;
225 mads[j] = if mad > 1e-15 { mad * 1.4826 } else { 1.0 };
226 }
227
228 self.means = Some(medians);
229 self.stds = Some(mads);
230
231 Ok(())
232 }
233
234 pub fn transform(&self, x: &ArrayView2<f64>) -> Result<Array2<f64>> {
236 let means = self
237 .means
238 .as_ref()
239 .ok_or_else(|| TransformError::NotFitted("StandardScaler not fitted".to_string()))?;
240 let stds = self
241 .stds
242 .as_ref()
243 .ok_or_else(|| TransformError::NotFitted("StandardScaler not fitted".to_string()))?;
244
245 check_not_empty(x, "x")?;
246
247 for &val in x.iter() {
249 if !val.is_finite() {
250 return Err(crate::error::TransformError::DataValidationError(
251 "Data contains non-finite values".to_string(),
252 ));
253 }
254 }
255
256 let (_n_samples, n_features) = x.dim();
257
258 if n_features != means.len() {
259 return Err(TransformError::InvalidInput(format!(
260 "Number of features {} doesn't match fitted features {}",
261 n_features,
262 means.len()
263 )));
264 }
265
266 match &self.strategy {
267 ProcessingStrategy::OutOfCore { chunk_size } => {
268 self.transform_out_of_core(x, means, stds, *chunk_size)
269 }
270 ProcessingStrategy::Parallel => self.transform_parallel(x, means, stds),
271 ProcessingStrategy::Simd => self.transform_simd(x, means, stds),
272 ProcessingStrategy::Standard => self.transform_standard(x, means, stds),
273 }
274 }
275
276 fn transform_out_of_core(
278 &self,
279 x: &ArrayView2<f64>,
280 means: &Array1<f64>,
281 stds: &Array1<f64>,
282 _chunk_size: usize,
283 ) -> Result<Array2<f64>> {
284 let (n_samples, n_features) = x.dim();
285 let mut result = Array2::zeros((n_samples, n_features));
286
287 let chunker = DataChunker::new(self.memory_limitmb);
288
289 for (start_idx, end_idx) in chunker.chunk_indices(n_samples, n_features) {
290 let chunk = x.slice(scirs2_core::ndarray::s![start_idx..end_idx, ..]);
291 let transformed_chunk =
292 (&chunk - &means.view().insert_axis(Axis(0))) / stds.view().insert_axis(Axis(0));
293
294 result
295 .slice_mut(scirs2_core::ndarray::s![start_idx..end_idx, ..])
296 .assign(&transformed_chunk);
297 }
298
299 Ok(result)
300 }
301
302 fn transform_parallel(
304 &self,
305 x: &ArrayView2<f64>,
306 means: &Array1<f64>,
307 stds: &Array1<f64>,
308 ) -> Result<Array2<f64>> {
309 let (n_samples, n_features) = x.dim();
310 let mut result = Array2::zeros((n_samples, n_features));
311
312 for (j, ((mean, std), col)) in means
314 .iter()
315 .zip(stds.iter())
316 .zip(result.columns_mut())
317 .enumerate()
318 {
319 let x_col = x.column(j);
320 par_azip!((out in col, &inp in x_col) {
321 *out = (inp - mean) / std;
322 });
323 }
324
325 Ok(result)
326 }
327
328 fn transform_simd(
330 &self,
331 x: &ArrayView2<f64>,
332 means: &Array1<f64>,
333 stds: &Array1<f64>,
334 ) -> Result<Array2<f64>> {
335 let centered = x - &means.view().insert_axis(Axis(0));
336 let result = ¢ered / &stds.view().insert_axis(Axis(0));
337 Ok(result)
338 }
339
340 fn transform_standard(
342 &self,
343 x: &ArrayView2<f64>,
344 means: &Array1<f64>,
345 stds: &Array1<f64>,
346 ) -> Result<Array2<f64>> {
347 let result = (x - &means.view().insert_axis(Axis(0))) / stds.view().insert_axis(Axis(0));
348 Ok(result)
349 }
350
351 pub fn fit_transform(&mut self, x: &ArrayView2<f64>) -> Result<Array2<f64>> {
353 self.fit(x)?;
354 self.transform(x)
355 }
356
357 pub fn means(&self) -> Option<&Array1<f64>> {
359 self.means.as_ref()
360 }
361
362 pub fn stds(&self) -> Option<&Array1<f64>> {
364 self.stds.as_ref()
365 }
366
367 pub fn processing_strategy(&self) -> &ProcessingStrategy {
369 &self.strategy
370 }
371}
372
373pub struct EnhancedPCA {
375 n_components: usize,
377 center: bool,
379 components: Option<Array2<f64>>,
381 explained_variance: Option<Array1<f64>>,
383 explained_variance_ratio: Option<Array1<f64>>,
385 mean: Option<Array1<f64>>,
387 strategy: ProcessingStrategy,
389 memory_limitmb: usize,
391 use_randomized: bool,
393}
394
395impl EnhancedPCA {
396 pub fn new(n_components: usize, center: bool, memory_limitmb: usize) -> Result<Self> {
398 check_positive(n_components, "n_components")?;
399
400 Ok(EnhancedPCA {
401 n_components,
402 center: true,
403 components: None,
404 explained_variance: None,
405 explained_variance_ratio: None,
406 mean: None,
407 strategy: ProcessingStrategy::Standard,
408 memory_limitmb,
409 use_randomized: false,
410 })
411 }
412
413 pub fn with_randomized_svd(mut self, userandomized: bool) -> Self {
415 self.use_randomized = userandomized;
416 self
417 }
418
419 pub fn fit(&mut self, x: &ArrayView2<f64>) -> Result<()> {
421 check_not_empty(x, "x")?;
422
423 for &val in x.iter() {
425 if !val.is_finite() {
426 return Err(crate::error::TransformError::DataValidationError(
427 "Data contains non-finite values".to_string(),
428 ));
429 }
430 }
431
432 let (n_samples, n_features) = x.dim();
433
434 if self.n_components > n_features.min(n_samples) {
435 return Err(TransformError::InvalidInput(
436 "n_components cannot be larger than min(n_samples, n_features)".to_string(),
437 ));
438 }
439
440 self.strategy =
442 PerfUtils::choose_processing_strategy(n_samples, n_features, self.memory_limitmb);
443
444 if n_samples > 50000 && n_features > 1000 {
446 self.use_randomized = true;
447 }
448
449 match &self.strategy {
450 ProcessingStrategy::OutOfCore { chunk_size } => {
451 self.fit_incremental_pca(x, *chunk_size)
452 }
453 _ => {
454 if self.use_randomized {
455 self.fit_randomized_pca(x)
456 } else {
457 self.fit_standard_pca(x)
458 }
459 }
460 }
461 }
462
463 fn fit_incremental_pca(&mut self, x: &ArrayView2<f64>, chunksize: usize) -> Result<()> {
465 let (n_samples, n_features) = x.dim();
466 let chunker = DataChunker::new(self.memory_limitmb);
467
468 let mut running_mean = Array1::<f64>::zeros(n_features);
470 let _running_var = Array1::<f64>::zeros(n_features);
471 let mut n_samples_seen = 0;
472
473 for (start_idx, end_idx) in chunker.chunk_indices(n_samples, n_features) {
475 let chunk = x.slice(scirs2_core::ndarray::s![start_idx..end_idx, ..]);
476 let chunk_mean = chunk.mean_axis(Axis(0)).expect("Operation failed");
477 let chunksize = end_idx - start_idx;
478
479 let total_samples = n_samples_seen + chunksize;
481 running_mean = (running_mean * n_samples_seen as f64 + chunk_mean * chunksize as f64)
482 / total_samples as f64;
483 n_samples_seen = total_samples;
484 }
485
486 self.mean = if self.center {
487 Some(running_mean.clone())
488 } else {
489 None
490 };
491
492 self.fit_streaming_incremental_pca(x, &running_mean, chunksize)
495 }
496
497 fn fit_streaming_incremental_pca(
501 &mut self,
502 x: &ArrayView2<f64>,
503 mean: &Array1<f64>,
504 _chunk_size: usize,
505 ) -> Result<()> {
506 let (n_samples, n_features) = x.dim();
507 let chunker = DataChunker::new(self.memory_limitmb);
508
509 let mut u = Array2::zeros((0, self.n_components)); let mut sigma = Array1::zeros(self.n_components);
512 let mut vt = Array2::zeros((self.n_components, n_features));
513
514 let mut n_samples_seen = 0;
516 let forgetting_factor = 0.95; for (start_idx, end_idx) in chunker.chunk_indices(n_samples, n_features) {
520 let chunk = x.slice(scirs2_core::ndarray::s![start_idx..end_idx, ..]);
521 let chunk_size_actual = end_idx - start_idx;
522
523 let chunk_centered = if self.center {
525 &chunk - &mean.view().insert_axis(Axis(0))
526 } else {
527 chunk.to_owned()
528 };
529
530 self.incremental_svd_update(
532 &chunk_centered,
533 &mut u,
534 &mut sigma,
535 &mut vt,
536 n_samples_seen,
537 forgetting_factor,
538 )?;
539
540 n_samples_seen += chunk_size_actual;
541
542 if n_samples_seen > 10000 {
544 sigma.mapv_inplace(|x| x * forgetting_factor);
545 }
546 }
547
548 self.components = Some(
551 vt.t()
552 .to_owned()
553 .slice(scirs2_core::ndarray::s![.., ..self.n_components])
554 .to_owned(),
555 );
556
557 let total_variance = sigma.iter().map(|&s| s * s).sum::<f64>();
559 if total_variance > 0.0 {
560 let variance_ratios = sigma.mapv(|s| (s * s) / total_variance);
561 self.explained_variance_ratio = Some(variance_ratios);
562 } else {
563 self.explained_variance_ratio =
564 Some(Array1::ones(self.n_components) / self.n_components as f64);
565 }
566
567 Ok(())
568 }
569
570 fn incremental_svd_update(
574 &self,
575 new_chunk: &Array2<f64>,
576 u: &mut Array2<f64>,
577 sigma: &mut Array1<f64>,
578 vt: &mut Array2<f64>,
579 n_samples_seen: usize,
580 forgetting_factor: f64,
581 ) -> Result<()> {
582 let (chunk_rows, n_features) = new_chunk.dim();
583
584 if n_samples_seen == 0 {
585 return self.initialize_svd_from_chunk(new_chunk, u, sigma, vt);
587 }
588
589 let projected = new_chunk.dot(&vt.t());
592
593 let reconstructed = projected.dot(vt);
595 let residual = new_chunk - &reconstructed;
596
597 let (q_residual, r_residual) = self.qr_decomposition_chunked(&residual)?;
599
600 let extended_u = if u.nrows() > 0 {
605 let mut new_u = Array2::zeros((u.nrows() + chunk_rows, u.ncols() + q_residual.ncols()));
607 new_u
608 .slice_mut(scirs2_core::ndarray::s![..u.nrows(), ..u.ncols()])
609 .assign(u);
610 if q_residual.ncols() > 0 {
612 new_u
613 .slice_mut(scirs2_core::ndarray::s![u.nrows().., u.ncols()..])
614 .assign(&q_residual);
615 }
616 new_u
617 } else {
618 q_residual.clone()
619 };
620
621 let mut augmented_sigma = Array2::zeros((
623 sigma.len() + r_residual.nrows(),
624 sigma.len() + r_residual.ncols(),
625 ));
626
627 for (i, &s) in sigma.iter().enumerate() {
629 augmented_sigma[[i, i]] = s * forgetting_factor.sqrt(); }
631
632 if r_residual.nrows() > 0 && r_residual.ncols() > 0 {
634 let start_row = sigma.len();
635 let start_col = sigma.len();
636 let end_row = (start_row + r_residual.nrows()).min(augmented_sigma.nrows());
637 let end_col = (start_col + r_residual.ncols()).min(augmented_sigma.ncols());
638
639 if end_row > start_row && end_col > start_col {
640 augmented_sigma
641 .slice_mut(scirs2_core::ndarray::s![
642 start_row..end_row,
643 start_col..end_col
644 ])
645 .assign(&r_residual.slice(scirs2_core::ndarray::s![
646 ..(end_row - start_row),
647 ..(end_col - start_col)
648 ]));
649 }
650 }
651
652 let (u_aug, sigma_new, vt_aug) = self.svd_small_matrix(&augmented_sigma)?;
654
655 let k = self.n_components.min(sigma_new.len());
658
659 *sigma = sigma_new.slice(scirs2_core::ndarray::s![..k]).to_owned();
660
661 if extended_u.ncols() >= u_aug.nrows() && u_aug.ncols() >= k {
663 *u = extended_u
664 .slice(scirs2_core::ndarray::s![.., ..u_aug.nrows()])
665 .dot(&u_aug.slice(scirs2_core::ndarray::s![.., ..k]));
666 }
667
668 if vt_aug.nrows() >= k && vt.ncols() == vt_aug.ncols() {
670 *vt = vt_aug.slice(scirs2_core::ndarray::s![..k, ..]).to_owned();
671 }
672
673 Ok(())
674 }
675
676 fn initialize_svd_from_chunk(
678 &self,
679 chunk: &Array2<f64>,
680 u: &mut Array2<f64>,
681 sigma: &mut Array1<f64>,
682 vt: &mut Array2<f64>,
683 ) -> Result<()> {
684 let (chunk_u, chunk_sigma, chunk_vt) = self.svd_small_matrix(chunk)?;
685
686 let k = self.n_components.min(chunk_sigma.len());
687
688 *u = chunk_u.slice(scirs2_core::ndarray::s![.., ..k]).to_owned();
689 *sigma = chunk_sigma.slice(scirs2_core::ndarray::s![..k]).to_owned();
690 *vt = chunk_vt.slice(scirs2_core::ndarray::s![..k, ..]).to_owned();
691
692 Ok(())
693 }
694
695 fn qr_decomposition_chunked(&self, matrix: &Array2<f64>) -> Result<(Array2<f64>, Array2<f64>)> {
697 let (m, n) = matrix.dim();
698
699 if m == 0 || n == 0 {
700 return Ok((Array2::zeros((m, 0)), Array2::zeros((0, n))));
701 }
702
703 let mut q = Array2::zeros((m, n.min(m)));
706 let mut r = Array2::zeros((n.min(m), n));
707
708 for j in 0..n.min(m) {
709 let mut col = matrix.column(j).to_owned();
710
711 for i in 0..j {
713 let q_col = q.column(i);
714 let proj = col.dot(&q_col);
715 col = &col - &(&q_col * proj);
716 r[[i, j]] = proj;
717 }
718
719 let norm = col.iter().map(|x| x * x).sum::<f64>().sqrt();
721 if norm > 1e-12 {
722 col /= norm;
723 r[[j, j]] = norm;
724 } else {
725 r[[j, j]] = 0.0;
726 }
727
728 q.column_mut(j).assign(&col);
729 }
730
731 Ok((q, r))
732 }
733
734 fn svd_small_matrix(
736 &self,
737 matrix: &Array2<f64>,
738 ) -> Result<(Array2<f64>, Array1<f64>, Array2<f64>)> {
739 let (m, n) = matrix.dim();
740
741 if m == 0 || n == 0 {
742 return Ok((
743 Array2::zeros((m, 0)),
744 Array1::zeros(0),
745 Array2::zeros((0, n)),
746 ));
747 }
748
749 let ata = matrix.t().dot(matrix);
754 let (eigenvals, eigenvecs) = self.symmetric_eigendecomposition(&ata)?;
755
756 let singular_values = eigenvals.mapv(|x| x.max(0.0).sqrt());
758
759 let vt = eigenvecs.t().to_owned();
761
762 let mut u = Array2::zeros((m, eigenvals.len()));
764 for (i, &sigma) in singular_values.iter().enumerate() {
765 if sigma > 1e-12 {
766 let v_col = eigenvecs.column(i);
767 let u_col = matrix.dot(&v_col) / sigma;
768 u.column_mut(i).assign(&u_col);
769 }
770 }
771
772 Ok((u, singular_values, vt))
773 }
774
775 fn symmetric_eigendecomposition(
777 &self,
778 matrix: &Array2<f64>,
779 ) -> Result<(Array1<f64>, Array2<f64>)> {
780 let n = matrix.nrows();
781 if n != matrix.ncols() {
782 return Err(TransformError::ComputationError(
783 "Matrix must be square".to_string(),
784 ));
785 }
786
787 if n == 0 {
788 return Ok((Array1::zeros(0), Array2::zeros((0, 0))));
789 }
790
791 let a = matrix.clone(); let mut eigenvals = Array1::zeros(n);
796 let mut eigenvecs = Array2::eye(n);
797
798 if n == 1 {
800 eigenvals[0] = a[[0, 0]];
801 return Ok((eigenvals, eigenvecs));
802 }
803
804 if n == 2 {
805 let trace = a[[0, 0]] + a[[1, 1]];
807 let det = a[[0, 0]] * a[[1, 1]] - a[[0, 1]] * a[[1, 0]];
808 let discriminant = (trace * trace - 4.0 * det).sqrt();
809
810 eigenvals[0] = (trace + discriminant) / 2.0;
811 eigenvals[1] = (trace - discriminant) / 2.0;
812
813 if a[[0, 1]].abs() > 1e-12 {
815 let norm0 = (a[[0, 1]] * a[[0, 1]] + (eigenvals[0] - a[[0, 0]]).powi(2)).sqrt();
816 eigenvecs[[0, 0]] = a[[0, 1]] / norm0;
817 eigenvecs[[1, 0]] = (eigenvals[0] - a[[0, 0]]) / norm0;
818
819 eigenvecs[[0, 1]] = -eigenvecs[[1, 0]];
821 eigenvecs[[1, 1]] = eigenvecs[[0, 0]];
822 }
823
824 if eigenvals[1] > eigenvals[0] {
826 eigenvals.swap(0, 1);
827 let temp0 = eigenvecs.column(0).to_owned();
829 let temp1 = eigenvecs.column(1).to_owned();
830 eigenvecs.column_mut(0).assign(&temp1);
831 eigenvecs.column_mut(1).assign(&temp0);
832 }
833
834 return Ok((eigenvals, eigenvecs));
835 }
836
837 let mut matrix_work = a.clone();
839
840 for i in 0..n.min(self.n_components) {
841 let mut v = Array1::<f64>::ones(n);
843 v /= v.dot(&v).sqrt();
844
845 let mut eigenval = 0.0;
846
847 for _iter in 0..1000 {
848 let new_v = matrix_work.dot(&v);
849 eigenval = v.dot(&new_v); let norm = new_v.dot(&new_v).sqrt();
851
852 if norm < 1e-15 {
853 break;
854 }
855
856 let new_v_normalized = &new_v / norm;
857
858 let diff = (&new_v_normalized - &v)
860 .dot(&(&new_v_normalized - &v))
861 .sqrt();
862 v = new_v_normalized;
863
864 if diff < 1e-12 {
865 break;
866 }
867 }
868
869 eigenvals[i] = eigenval;
870 eigenvecs.column_mut(i).assign(&v);
871
872 let vv = v
874 .view()
875 .insert_axis(Axis(1))
876 .dot(&v.view().insert_axis(Axis(0)));
877 matrix_work = matrix_work - eigenval * vv;
878 }
879
880 Ok((eigenvals, eigenvecs))
881 }
882
883 fn fit_randomized_pca(&mut self, x: &ArrayView2<f64>) -> Result<()> {
887 let _n_samples_n_features = x.dim();
888
889 let mean = if self.center {
891 Some(x.mean_axis(Axis(0)).expect("Operation failed"))
892 } else {
893 None
894 };
895
896 let x_centered = if let Some(ref m) = mean {
897 x - &m.view().insert_axis(Axis(0))
898 } else {
899 x.to_owned()
900 };
901
902 self.mean = mean;
903
904 self.fit_randomized_svd(&x_centered.view())
907 }
908
909 fn fit_randomized_svd(&mut self, x: &ArrayView2<f64>) -> Result<()> {
912 let (n_samples, n_features) = x.dim();
913
914 let oversampling = if n_features > 1000 { 10 } else { 5 };
916 let sketch_size = (self.n_components + oversampling).min(n_features.min(n_samples));
917
918 let n_power_iterations = if n_features > 5000 { 2 } else { 1 };
920
921 let random_matrix = self.generate_random_gaussian_matrix(n_features, sketch_size)?;
924
925 let mut y = x.dot(&random_matrix);
927
928 for _ in 0..n_power_iterations {
931 let xty = x.t().dot(&y);
933 y = x.dot(&xty);
934 }
935
936 let (q, r) = self.qr_decomposition_chunked(&y)?;
938
939 let b = q.t().dot(x);
942
943 let (u_b, sigma, vt) = self.svd_small_matrix(&b)?;
945
946 let _u = q.dot(&u_b);
949
950 let k = self.n_components.min(sigma.len());
953
954 let components = vt.slice(scirs2_core::ndarray::s![..k, ..]).t().to_owned();
956 self.components = Some(components.t().to_owned());
957
958 let total_variance = sigma.iter().take(k).map(|&s| s * s).sum::<f64>();
960 if total_variance > 0.0 {
961 let explained_variance = sigma.slice(scirs2_core::ndarray::s![..k]).mapv(|s| s * s);
962 let variance_ratios = &explained_variance / total_variance;
963 self.explained_variance_ratio = Some(variance_ratios);
964 self.explained_variance = Some(explained_variance);
965 } else {
966 let uniform_variance = Array1::ones(k) / k as f64;
967 self.explained_variance_ratio = Some(uniform_variance.clone());
968 self.explained_variance = Some(uniform_variance);
969 }
970
971 Ok(())
972 }
973
974 fn generate_random_gaussian_matrix(&self, rows: usize, cols: usize) -> Result<Array2<f64>> {
976 let mut rng = scirs2_core::random::rng();
977 let mut random_matrix = Array2::zeros((rows, cols));
978
979 for i in 0..rows {
981 for j in 0..cols {
982 let u1 = rng.random_range(0.0..1.0);
984 let u2 = rng.random_range(0.0..1.0);
985
986 let u1 = if u1 == 0.0 { f64::EPSILON } else { u1 };
988
989 let z = (-2.0 * u1.ln()).sqrt() * (2.0 * std::f64::consts::PI * u2).cos();
990 random_matrix[[i, j]] = z;
991 }
992 }
993
994 for j in 0..cols {
996 let mut col = random_matrix.column_mut(j);
997 let norm = col.dot(&col).sqrt();
998 if norm > f64::EPSILON {
999 col /= norm;
1000 }
1001 }
1002
1003 Ok(random_matrix)
1004 }
1005
1006 fn fit_standard_pca(&mut self, x: &ArrayView2<f64>) -> Result<()> {
1008 let mean = if self.center {
1010 Some(x.mean_axis(Axis(0)).expect("Operation failed"))
1011 } else {
1012 None
1013 };
1014
1015 let x_centered = if let Some(ref m) = mean {
1016 x - &m.view().insert_axis(Axis(0))
1017 } else {
1018 x.to_owned()
1019 };
1020
1021 self.mean = mean;
1022 self.fit_standard_pca_on_data(&x_centered.view())
1023 }
1024
1025 fn fit_standard_pca_on_data(&mut self, x: &ArrayView2<f64>) -> Result<()> {
1027 let (n_samples, n_features) = x.dim();
1028
1029 let cov = if n_samples > n_features {
1031 let xt = x.t();
1033 xt.dot(x) / (n_samples - 1) as f64
1034 } else {
1035 x.dot(&x.t()) / (n_samples - 1) as f64
1037 };
1038
1039 let min_dim = n_features.min(n_samples);
1043 let n_components = self.n_components.min(min_dim);
1044
1045 let (eigenvals, eigenvecs) = self.compute_top_eigenpairs(&cov, n_components)?;
1047
1048 let mut eigen_pairs: Vec<(f64, Array1<f64>)> = eigenvals
1050 .iter()
1051 .zip(eigenvecs.columns())
1052 .map(|(&val, vec)| (val, vec.to_owned()))
1053 .collect();
1054
1055 eigen_pairs.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
1056
1057 let explained_variance = Array1::from_iter(eigen_pairs.iter().map(|(val_, _)| *val_));
1059 let mut components = Array2::zeros((n_components, cov.ncols()));
1060
1061 for (i, (_, eigenvec)) in eigen_pairs.iter().enumerate() {
1062 components.row_mut(i).assign(eigenvec);
1063 }
1064
1065 self.components = Some(components.t().to_owned());
1066 self.explained_variance = Some(explained_variance);
1067
1068 Ok(())
1069 }
1070
1071 pub fn transform(&self, x: &ArrayView2<f64>) -> Result<Array2<f64>> {
1073 let components = self
1074 .components
1075 .as_ref()
1076 .ok_or_else(|| TransformError::NotFitted("PCA not fitted".to_string()))?;
1077
1078 check_not_empty(x, "x")?;
1079
1080 for &val in x.iter() {
1082 if !val.is_finite() {
1083 return Err(crate::error::TransformError::DataValidationError(
1084 "Data contains non-finite values".to_string(),
1085 ));
1086 }
1087 }
1088
1089 let x_processed = if let Some(ref mean) = self.mean {
1091 x - &mean.view().insert_axis(Axis(0))
1092 } else {
1093 x.to_owned()
1094 };
1095
1096 let transformed = if components.shape()[1] == x_processed.shape()[1] {
1099 x_processed.dot(&components.t())
1101 } else if components.shape()[0] == x_processed.shape()[1] {
1102 x_processed.dot(components)
1104 } else {
1105 return Err(crate::error::TransformError::InvalidInput(format!(
1106 "Component dimensions {:?} are incompatible with data dimensions {:?}",
1107 components.shape(),
1108 x_processed.shape()
1109 )));
1110 };
1111
1112 Ok(transformed)
1113 }
1114
1115 pub fn fit_transform(&mut self, x: &ArrayView2<f64>) -> Result<Array2<f64>> {
1117 self.fit(x)?;
1118 self.transform(x)
1119 }
1120
1121 pub fn explained_variance_ratio(&self) -> Option<Array1<f64>> {
1123 self.explained_variance.as_ref().map(|ev| {
1124 let total_var = ev.sum();
1125 ev / total_var
1126 })
1127 }
1128
1129 pub fn components(&self) -> Option<&Array2<f64>> {
1131 self.components.as_ref()
1132 }
1133
1134 pub fn processing_strategy(&self) -> &ProcessingStrategy {
1136 &self.strategy
1137 }
1138
1139 fn compute_top_eigenpairs(
1141 &self,
1142 matrix: &Array2<f64>,
1143 n_components: usize,
1144 ) -> Result<(Array1<f64>, Array2<f64>)> {
1145 let n = matrix.nrows();
1146 if n != matrix.ncols() {
1147 return Err(TransformError::ComputationError(
1148 "Matrix must be square for eigendecomposition".to_string(),
1149 ));
1150 }
1151
1152 let mut eigenvalues = Array1::zeros(n_components);
1153 let mut eigenvectors = Array2::zeros((n, n_components));
1154 let mut working_matrix = matrix.clone();
1155
1156 for k in 0..n_components {
1157 let (eigenval, eigenvec) = self.power_iteration(&working_matrix)?;
1159
1160 eigenvalues[k] = eigenval;
1161 eigenvectors.column_mut(k).assign(&eigenvec);
1162
1163 let outer_product = &eigenvec
1166 .view()
1167 .insert_axis(Axis(1))
1168 .dot(&eigenvec.view().insert_axis(Axis(0)));
1169 working_matrix = &working_matrix - &(eigenval * outer_product);
1170 }
1171
1172 Ok((eigenvalues, eigenvectors))
1173 }
1174
1175 fn power_iteration(&self, matrix: &Array2<f64>) -> Result<(f64, Array1<f64>)> {
1177 let n = matrix.nrows();
1178 let max_iterations = 1000;
1179 let tolerance = 1e-10;
1180
1181 use scirs2_core::random::Rng;
1183 let mut rng = scirs2_core::random::rng();
1184 let mut vector: Array1<f64> =
1185 Array1::from_shape_fn(n, |_| rng.random_range(0.0..1.0) - 0.5);
1186
1187 let norm = vector.dot(&vector).sqrt();
1189 if norm > f64::EPSILON {
1190 vector /= norm;
1191 } else {
1192 vector = Array1::zeros(n);
1194 vector[0] = 1.0;
1195 }
1196
1197 let mut eigenvalue = 0.0;
1198 let mut prev_eigenvalue = 0.0;
1199
1200 for iteration in 0..max_iterations {
1201 let new_vector = matrix.dot(&vector);
1203
1204 let numerator = vector.dot(&new_vector);
1206 let denominator = vector.dot(&vector);
1207
1208 if denominator < f64::EPSILON {
1209 return Err(TransformError::ComputationError(
1210 "Vector became zero during power iteration".to_string(),
1211 ));
1212 }
1213
1214 eigenvalue = numerator / denominator;
1215
1216 let norm = new_vector.dot(&new_vector).sqrt();
1218 if norm > f64::EPSILON {
1219 vector = new_vector / norm;
1220 } else {
1221 break;
1223 }
1224
1225 if iteration > 0 && (eigenvalue - prev_eigenvalue).abs() < tolerance {
1227 break;
1228 }
1229
1230 prev_eigenvalue = eigenvalue;
1231 }
1232
1233 let norm = vector.dot(&vector).sqrt();
1235 if norm > f64::EPSILON {
1236 vector /= norm;
1237 }
1238
1239 Ok((eigenvalue, vector))
1240 }
1241
1242 #[allow(dead_code)]
1244 fn qr_eigendecomposition(
1245 &self,
1246 matrix: &Array2<f64>,
1247 n_components: usize,
1248 ) -> Result<(Array1<f64>, Array2<f64>)> {
1249 let n = matrix.nrows();
1250 if n != matrix.ncols() {
1251 return Err(TransformError::ComputationError(
1252 "Matrix must be square for QR eigendecomposition".to_string(),
1253 ));
1254 }
1255
1256 if n > 100 {
1258 return self.compute_top_eigenpairs(matrix, n_components);
1259 }
1260
1261 let max_iterations = 100;
1262 let tolerance = 1e-12;
1263 let mut a = matrix.clone();
1264 let mut q_total = Array2::eye(n);
1265
1266 for _iteration in 0..max_iterations {
1268 let (q, r) = self.qr_decomposition(&a)?;
1269 a = r.dot(&q);
1270 q_total = q_total.dot(&q);
1271
1272 let mut off_diagonal_norm = 0.0;
1274 for i in 0..n {
1275 for j in 0..n {
1276 if i != j {
1277 off_diagonal_norm += a[[i, j]] * a[[i, j]];
1278 }
1279 }
1280 }
1281
1282 if off_diagonal_norm.sqrt() < tolerance {
1283 break;
1284 }
1285 }
1286
1287 let eigenvals: Vec<f64> = (0..n).map(|i| a[[i, i]]).collect();
1289 let eigenvecs = q_total;
1290
1291 let mut indices: Vec<usize> = (0..n).collect();
1293 indices.sort_by(|&i, &j| {
1294 eigenvals[j]
1295 .partial_cmp(&eigenvals[i])
1296 .unwrap_or(std::cmp::Ordering::Equal)
1297 });
1298
1299 let top_eigenvals =
1301 Array1::from_iter(indices.iter().take(n_components).map(|&i| eigenvals[i]));
1302
1303 let mut top_eigenvecs = Array2::zeros((n, n_components));
1304 for (k, &i) in indices.iter().take(n_components).enumerate() {
1305 top_eigenvecs.column_mut(k).assign(&eigenvecs.column(i));
1306 }
1307
1308 Ok((top_eigenvals, top_eigenvecs))
1309 }
1310
1311 #[allow(dead_code)]
1313 fn qr_decomposition(&self, matrix: &Array2<f64>) -> Result<(Array2<f64>, Array2<f64>)> {
1314 let (m, n) = matrix.dim();
1315 let mut q = Array2::zeros((m, n));
1316 let mut r = Array2::zeros((n, n));
1317
1318 for j in 0..n {
1319 let mut v = matrix.column(j).to_owned();
1320
1321 for i in 0..j {
1323 let q_i = q.column(i);
1324 let projection = q_i.dot(&v);
1325 r[[i, j]] = projection;
1326 v = v - projection * &q_i;
1327 }
1328
1329 let norm = v.dot(&v).sqrt();
1331 if norm > f64::EPSILON {
1332 r[[j, j]] = norm;
1333 q.column_mut(j).assign(&(&v / norm));
1334 } else {
1335 r[[j, j]] = 0.0;
1336 q.column_mut(j).fill(0.0);
1338 }
1339 }
1340
1341 Ok((q, r))
1342 }
1343
1344 fn qr_decomposition_full(&self, matrix: &Array2<f64>) -> Result<(Array2<f64>, Array2<f64>)> {
1346 let (m, n) = matrix.dim();
1347 let mut q = Array2::zeros((m, m)); let mut r = Array2::zeros((m, n)); let (q_reduced, r_reduced) = self.qr_decomposition(matrix)?;
1352
1353 q.slice_mut(scirs2_core::ndarray::s![.., ..n])
1355 .assign(&q_reduced);
1356
1357 r.slice_mut(scirs2_core::ndarray::s![..n, ..])
1359 .assign(&r_reduced);
1360
1361 for j in n..m {
1363 let mut v = Array1::zeros(m);
1364 v[j] = 1.0; for i in 0..j {
1368 let q_i = q.column(i);
1369 let projection = q_i.dot(&v);
1370 v = v - projection * &q_i;
1371 }
1372
1373 let norm = v.dot(&v).sqrt();
1375 if norm > f64::EPSILON {
1376 q.column_mut(j).assign(&(&v / norm));
1377 }
1378 }
1379
1380 Ok((q, r))
1381 }
1382}
1383
1384pub struct AdvancedMemoryPool {
1387 matrix_pools: std::collections::HashMap<(usize, usize), Vec<Array2<f64>>>,
1389 vector_pools: std::collections::HashMap<usize, Vec<Array1<f64>>>,
1391 max_matrices_per_size: usize,
1393 max_vectors_per_size: usize,
1395 stats: PoolStats,
1397}
1398
1399#[derive(Debug, Clone)]
1401pub struct PoolStats {
1402 pub total_allocations: usize,
1404 pub pool_hits: usize,
1406 pub pool_misses: usize,
1408 pub total_memory_mb: f64,
1410 pub peak_memory_mb: f64,
1412 pub current_matrices: usize,
1414 pub current_vectors: usize,
1416 pub transform_count: u64,
1418 pub total_transform_time_ns: u64,
1420 pub throughput_samples_per_sec: f64,
1422 pub cache_hit_rate: f64,
1424}
1425
1426impl AdvancedMemoryPool {
1427 pub fn new(max_matrices: usize, max_vectors: usize, initialcapacity: usize) -> Self {
1429 let mut pool = AdvancedMemoryPool {
1430 matrix_pools: std::collections::HashMap::with_capacity(initialcapacity),
1431 vector_pools: std::collections::HashMap::with_capacity(initialcapacity),
1432 max_matrices_per_size: max_matrices,
1433 max_vectors_per_size: max_vectors,
1434 stats: PoolStats {
1435 total_allocations: 0,
1436 pool_hits: 0,
1437 pool_misses: 0,
1438 total_memory_mb: 0.0,
1439 peak_memory_mb: 0.0,
1440 current_matrices: 0,
1441 current_vectors: 0,
1442 transform_count: 0,
1443 total_transform_time_ns: 0,
1444 throughput_samples_per_sec: 0.0,
1445 cache_hit_rate: 0.0,
1446 },
1447 };
1448
1449 pool.prewarm_common_sizes();
1451 pool
1452 }
1453
1454 fn prewarm_common_sizes(&mut self) {
1456 let common_matrix_sizes = vec![
1458 (100, 10),
1459 (500, 20),
1460 (1000, 50),
1461 (5000, 100),
1462 (10000, 200),
1463 (50000, 500),
1464 ];
1465
1466 for (rows, cols) in common_matrix_sizes {
1467 let pool = self.matrix_pools.entry((rows, cols)).or_default();
1468 for _ in 0..(self.max_matrices_per_size / 4) {
1469 pool.push(Array2::zeros((rows, cols)));
1470 self.stats.current_matrices += 1;
1471 }
1472 }
1473
1474 let common_vector_sizes = vec![10, 20, 50, 100, 200, 500, 1000, 5000];
1476 for size in common_vector_sizes {
1477 let pool = self.vector_pools.entry(size).or_default();
1478 for _ in 0..(self.max_vectors_per_size / 4) {
1479 pool.push(Array1::zeros(size));
1480 self.stats.current_vectors += 1;
1481 }
1482 }
1483
1484 self.update_memory_stats();
1485 }
1486
1487 pub fn get_matrix(&mut self, rows: usize, cols: usize) -> Array2<f64> {
1489 self.stats.total_allocations += 1;
1490
1491 if let Some(pool) = self.matrix_pools.get_mut(&(rows, cols)) {
1492 if let Some(mut matrix) = pool.pop() {
1493 matrix.fill(0.0);
1495 self.stats.pool_hits += 1;
1496 self.stats.current_matrices -= 1;
1497 return matrix;
1498 }
1499 }
1500
1501 self.stats.pool_misses += 1;
1503 Array2::zeros((rows, cols))
1504 }
1505
1506 pub fn get_vector(&mut self, size: usize) -> Array1<f64> {
1508 self.stats.total_allocations += 1;
1509
1510 if let Some(pool) = self.vector_pools.get_mut(&size) {
1511 if let Some(mut vector) = pool.pop() {
1512 vector.fill(0.0);
1514 self.stats.pool_hits += 1;
1515 self.stats.current_vectors -= 1;
1516 return vector;
1517 }
1518 }
1519
1520 self.stats.pool_misses += 1;
1522 Array1::zeros(size)
1523 }
1524
1525 pub fn return_matrix(&mut self, matrix: Array2<f64>) {
1527 let shape = (matrix.nrows(), matrix.ncols());
1528 let pool = self.matrix_pools.entry(shape).or_default();
1529
1530 if pool.len() < self.max_matrices_per_size {
1531 pool.push(matrix);
1532 self.stats.current_matrices += 1;
1533 self.update_memory_stats();
1534 }
1535 }
1536
1537 pub fn return_vector(&mut self, vector: Array1<f64>) {
1539 let size = vector.len();
1540 let pool = self.vector_pools.entry(size).or_default();
1541
1542 if pool.len() < self.max_vectors_per_size {
1543 pool.push(vector);
1544 self.stats.current_vectors += 1;
1545 self.update_memory_stats();
1546 }
1547 }
1548
1549 fn update_memory_stats(&mut self) {
1551 let mut total_memory = 0.0;
1552
1553 for ((rows, cols), pool) in &self.matrix_pools {
1555 total_memory += (rows * cols * 8 * pool.len()) as f64; }
1557
1558 for (size, pool) in &self.vector_pools {
1560 total_memory += (size * 8 * pool.len()) as f64; }
1562
1563 self.stats.total_memory_mb = total_memory / (1024.0 * 1024.0);
1564 if self.stats.total_memory_mb > self.stats.peak_memory_mb {
1565 self.stats.peak_memory_mb = self.stats.total_memory_mb;
1566 }
1567
1568 self.update_cache_hit_rate();
1570 }
1571
1572 pub fn stats(&self) -> &PoolStats {
1574 &self.stats
1575 }
1576
1577 pub fn efficiency(&self) -> f64 {
1579 if self.stats.total_allocations == 0 {
1580 0.0
1581 } else {
1582 self.stats.pool_hits as f64 / self.stats.total_allocations as f64
1583 }
1584 }
1585
1586 fn update_cache_hit_rate(&mut self) {
1588 self.stats.cache_hit_rate = self.efficiency();
1589 }
1590
1591 pub fn update_stats(&mut self, transform_time_ns: u64, samplesprocessed: usize) {
1593 self.stats.transform_count += 1;
1594 self.stats.total_transform_time_ns += transform_time_ns;
1595
1596 if self.stats.transform_count > 0 {
1597 let avg_time_per_transform =
1598 self.stats.total_transform_time_ns / self.stats.transform_count;
1599 if avg_time_per_transform > 0 {
1600 self.stats.throughput_samples_per_sec =
1601 (samplesprocessed as f64) / (avg_time_per_transform as f64 / 1_000_000_000.0);
1602 }
1603 }
1604
1605 self.update_memory_stats();
1607 }
1608
1609 pub fn clear(&mut self) {
1611 self.matrix_pools.clear();
1612 self.vector_pools.clear();
1613 self.stats.current_matrices = 0;
1614 self.stats.current_vectors = 0;
1615 self.update_memory_stats();
1616 }
1617
1618 pub fn adaptive_resize(&mut self) {
1620 let efficiency = self.efficiency();
1621
1622 if efficiency > 0.8 {
1623 self.max_matrices_per_size = (self.max_matrices_per_size as f32 * 1.2) as usize;
1625 self.max_vectors_per_size = (self.max_vectors_per_size as f32 * 1.2) as usize;
1626 } else if efficiency < 0.3 {
1627 self.max_matrices_per_size = (self.max_matrices_per_size as f32 * 0.8) as usize;
1629 self.max_vectors_per_size = (self.max_vectors_per_size as f32 * 0.8) as usize;
1630
1631 for pool in self.matrix_pools.values_mut() {
1633 pool.truncate(self.max_matrices_per_size);
1634 }
1635 for pool in self.vector_pools.values_mut() {
1636 pool.truncate(self.max_vectors_per_size);
1637 }
1638 }
1639
1640 self.update_memory_stats();
1641 }
1642
1643 pub fn get_array(&mut self, rows: usize, cols: usize) -> Array2<f64> {
1645 self.get_matrix(rows, cols)
1646 }
1647
1648 pub fn return_array(&mut self, array: Array2<f64>) {
1650 self.return_matrix(array);
1651 }
1652
1653 pub fn get_temp_array(&mut self, size: usize) -> Array1<f64> {
1655 self.get_vector(size)
1656 }
1657
1658 pub fn return_temp_array(&mut self, temp: Array1<f64>) {
1660 self.return_vector(temp);
1661 }
1662
1663 pub fn optimize(&mut self) {
1665 self.adaptive_resize();
1666 }
1667}
1668
1669pub struct AdvancedPCA {
1671 enhanced_pca: EnhancedPCA,
1672 memory_pool: AdvancedMemoryPool,
1673 processing_cache: std::collections::HashMap<(usize, usize), CachedPCAResult>,
1674}
1675
1676#[derive(Clone)]
1678struct CachedPCAResult {
1679 #[allow(dead_code)]
1680 components: Array2<f64>,
1681 #[allow(dead_code)]
1682 explained_variance_ratio: Array1<f64>,
1683 data_hash: u64,
1684 timestamp: std::time::Instant,
1685}
1686
1687impl AdvancedPCA {
1688 pub fn new(_n_components: usize, _n_samples_hint: usize, hint: usize) -> Self {
1690 let enhanced_pca = EnhancedPCA::new(_n_components, true, 1024).expect("Operation failed");
1691 let memory_pool = AdvancedMemoryPool::new(
1692 100, 200, 20, );
1696
1697 AdvancedPCA {
1698 enhanced_pca,
1699 memory_pool,
1700 processing_cache: std::collections::HashMap::new(),
1701 }
1702 }
1703
1704 pub fn fit(&mut self, x: &ArrayView2<f64>) -> Result<()> {
1706 self.enhanced_pca.fit(x)
1707 }
1708
1709 pub fn fit_transform(&mut self, x: &ArrayView2<f64>) -> Result<Array2<f64>> {
1711 self.enhanced_pca.fit_transform(x)
1712 }
1713
1714 pub fn components(&self) -> Option<&Array2<f64>> {
1716 self.enhanced_pca.components()
1717 }
1718
1719 pub fn mean(&self) -> Option<&Array1<f64>> {
1721 self.enhanced_pca.mean.as_ref()
1722 }
1723
1724 pub fn explained_variance_ratio(&self) -> Result<Array1<f64>> {
1726 self.enhanced_pca.explained_variance_ratio().ok_or_else(|| {
1727 TransformError::NotFitted(
1728 "PCA must be fitted before getting explained variance ratio".to_string(),
1729 )
1730 })
1731 }
1732
1733 pub fn fast_transform(&mut self, x: &ArrayView2<f64>) -> Result<Array2<f64>> {
1735 let (n_samples, n_features) = x.dim();
1736
1737 let data_hash = self.compute_data_hash(x);
1739 if let Some(cached) = self.processing_cache.get(&(n_samples, n_features)) {
1740 if cached.data_hash == data_hash && cached.timestamp.elapsed().as_secs() < 300 {
1741 let result = self
1743 .memory_pool
1744 .get_matrix(n_samples, self.enhanced_pca.n_components);
1745 return Ok(result);
1746 }
1747 }
1748
1749 let result = self.enhanced_pca.transform(x)?;
1751
1752 if let (Some(components), Some(explained_variance_ratio)) = (
1754 self.enhanced_pca.components().cloned(),
1755 self.enhanced_pca.explained_variance_ratio(),
1756 ) {
1757 self.processing_cache.insert(
1758 (n_samples, n_features),
1759 CachedPCAResult {
1760 components,
1761 explained_variance_ratio,
1762 data_hash,
1763 timestamp: std::time::Instant::now(),
1764 },
1765 );
1766 }
1767
1768 Ok(result)
1769 }
1770
1771 fn compute_data_hash(&self, x: &ArrayView2<f64>) -> u64 {
1773 use std::collections::hash_map::DefaultHasher;
1774 use std::hash::{Hash, Hasher};
1775
1776 let mut hasher = DefaultHasher::new();
1777
1778 x.shape().hash(&mut hasher);
1780
1781 let (n_samples, n_features) = x.dim();
1783 let sample_step = ((n_samples * n_features) / 1000).max(1);
1784
1785 for (i, &val) in x.iter().step_by(sample_step).enumerate() {
1786 if i > 1000 {
1787 break;
1788 } (val.to_bits()).hash(&mut hasher);
1790 }
1791
1792 hasher.finish()
1793 }
1794
1795 pub fn performance_stats(&self) -> &PoolStats {
1797 self.memory_pool.stats()
1798 }
1799
1800 pub fn cleanup_cache(&mut self) {
1802 let now = std::time::Instant::now();
1803 self.processing_cache.retain(|_, cached| {
1804 now.duration_since(cached.timestamp).as_secs() < 1800 });
1806 }
1807
1808 pub fn transform(&mut self, x: &ArrayView2<f64>) -> Result<Array2<f64>> {
1810 let start_time = std::time::Instant::now();
1811 let result = self.enhanced_pca.transform(x)?;
1812
1813 let duration = start_time.elapsed();
1815 let samples = x.shape()[0];
1816 self.memory_pool
1817 .update_stats(duration.as_nanos() as u64, samples);
1818
1819 Ok(result)
1820 }
1821
1822 pub fn qr_decomposition_optimized(
1824 &self,
1825 matrix: &Array2<f64>,
1826 ) -> Result<(Array2<f64>, Array2<f64>)> {
1827 self.enhanced_pca.qr_decomposition_full(matrix)
1828 }
1829
1830 pub fn svd_small_matrix(
1832 &self,
1833 matrix: &Array2<f64>,
1834 ) -> Result<(Array2<f64>, Array1<f64>, Array2<f64>)> {
1835 self.enhanced_pca.svd_small_matrix(matrix)
1836 }
1837}
1838
1839#[cfg(test)]
1840#[path = "performance_tests.rs"]
1841mod tests;