1use std::fmt;
15
16#[derive(Debug, Clone, PartialEq)]
22pub enum PcaError {
23 InsufficientData { samples: usize, features: usize },
25 InvalidComponents { requested: usize, max: usize },
27 DimensionMismatch,
29}
30
31impl fmt::Display for PcaError {
32 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
33 match self {
34 PcaError::InsufficientData { samples, features } => write!(
35 f,
36 "insufficient data: {samples} samples and {features} features (need at least 2 of each)"
37 ),
38 PcaError::InvalidComponents { requested, max } => write!(
39 f,
40 "requested {requested} components but max is {max}"
41 ),
42 PcaError::DimensionMismatch => write!(f, "input dimension does not match the fitted model"),
43 }
44 }
45}
46
47impl std::error::Error for PcaError {}
48
49#[derive(Debug, Clone)]
55pub struct PcaConfig {
56 pub n_components: usize,
58 pub center: bool,
60 pub scale: bool,
62}
63
64impl Default for PcaConfig {
65 fn default() -> Self {
66 Self {
67 n_components: 2,
68 center: true,
69 scale: false,
70 }
71 }
72}
73
74#[derive(Debug, Clone)]
80pub struct PcaModel {
81 pub mean: Vec<f64>,
83 pub std_dev: Vec<f64>,
85 pub components: Vec<Vec<f64>>,
87 pub explained_variance: Vec<f64>,
89 pub n_components: usize,
91 pub n_features: usize,
93}
94
95pub fn dot(a: &[f64], b: &[f64]) -> f64 {
101 a.iter().zip(b.iter()).map(|(x, y)| x * y).sum()
102}
103
104pub fn norm(v: &[f64]) -> f64 {
106 dot(v, v).sqrt()
107}
108
109pub fn normalize(v: &mut [f64]) {
111 let n = norm(v);
112 if n > 1e-15 {
113 for x in v.iter_mut() {
114 *x /= n;
115 }
116 }
117}
118
119#[derive(Debug, Default)]
125pub struct PcaReducer;
126
127impl PcaReducer {
128 pub fn new() -> Self {
130 Self
131 }
132
133 pub fn fit(data: &[Vec<f64>], config: &PcaConfig) -> Result<PcaModel, PcaError> {
137 let n = data.len();
138 let d = data.first().map(|r| r.len()).unwrap_or(0);
139
140 if n < 2 || d < 2 {
141 return Err(PcaError::InsufficientData {
142 samples: n,
143 features: d,
144 });
145 }
146 let k = config.n_components;
147 if k == 0 || k > d {
148 return Err(PcaError::InvalidComponents {
149 requested: k,
150 max: d,
151 });
152 }
153
154 let mut mean = vec![0.0f64; d];
156 for row in data {
157 for (j, &v) in row.iter().enumerate() {
158 mean[j] += v;
159 }
160 }
161 for m in &mut mean {
162 *m /= n as f64;
163 }
164
165 let mut std_dev = vec![1.0f64; d];
167 if config.scale {
168 let mut variance = vec![0.0f64; d];
169 for row in data {
170 for (j, &v) in row.iter().enumerate() {
171 let diff = v - mean[j];
172 variance[j] += diff * diff;
173 }
174 }
175 for (j, s) in std_dev.iter_mut().enumerate() {
176 let var = variance[j] / (n as f64 - 1.0).max(1.0);
177 *s = if var > 1e-15 { var.sqrt() } else { 1.0 };
178 }
179 }
180
181 let apply_mean = if config.center {
183 &mean[..]
184 } else {
185 &vec![0.0f64; d][..]
186 };
187 let centered: Vec<Vec<f64>> = data
188 .iter()
189 .map(|row| {
190 row.iter()
191 .enumerate()
192 .map(|(j, &v)| (v - apply_mean[j]) / std_dev[j])
193 .collect()
194 })
195 .collect();
196
197 let denom = (n as f64 - 1.0).max(1.0);
199 let mut cov = vec![vec![0.0f64; d]; d];
200 for row in ¢ered {
201 #[allow(clippy::needless_range_loop)]
202 for i in 0..d {
203 for j in 0..d {
204 cov[i][j] += row[i] * row[j];
205 }
206 }
207 }
208 #[allow(clippy::needless_range_loop)]
209 for i in 0..d {
210 for j in 0..d {
211 cov[i][j] /= denom;
212 }
213 }
214
215 let mut components: Vec<Vec<f64>> = Vec::with_capacity(k);
217 let mut eigenvalues: Vec<f64> = Vec::with_capacity(k);
218 let mut deflated = cov.clone();
220
221 for comp_idx in 0..k {
222 let mut ev: Vec<f64> = (0..d).map(|j| deflated[j][j].abs()).collect();
224 normalize(&mut ev);
225 if norm(&ev) < 1e-15 {
227 ev = vec![0.0; d];
228 if comp_idx < d {
229 ev[comp_idx] = 1.0;
230 }
231 }
232
233 for _ in 0..50 {
235 let mut new_ev = vec![0.0f64; d];
237 for i in 0..d {
238 for j in 0..d {
239 new_ev[i] += deflated[i][j] * ev[j];
240 }
241 }
242 normalize(&mut new_ev);
243 ev = new_ev;
244 }
245
246 let mut av = vec![0.0f64; d];
248 for i in 0..d {
249 for j in 0..d {
250 av[i] += deflated[i][j] * ev[j];
251 }
252 }
253 let lambda = dot(&ev, &av);
254
255 for prev in &components {
257 let proj = dot(&ev, prev);
258 for j in 0..d {
259 ev[j] -= proj * prev[j];
260 }
261 }
262 normalize(&mut ev);
263
264 eigenvalues.push(lambda.max(0.0));
265 components.push(ev.clone());
266
267 for i in 0..d {
269 for j in 0..d {
270 deflated[i][j] -= lambda * ev[i] * ev[j];
271 }
272 }
273 }
274
275 let total_var: f64 = eigenvalues.iter().sum();
277 let explained_variance_ratio: Vec<f64> = if total_var > 1e-15 {
278 eigenvalues.iter().map(|&e| e / total_var).collect()
279 } else {
280 vec![1.0 / k as f64; k]
281 };
282
283 Ok(PcaModel {
284 mean,
285 std_dev,
286 components,
287 explained_variance: explained_variance_ratio,
288 n_components: k,
289 n_features: d,
290 })
291 }
292
293 pub fn transform(model: &PcaModel, data: &[Vec<f64>]) -> Result<Vec<Vec<f64>>, PcaError> {
295 let d = model.n_features;
296 let k = model.n_components;
297 let mut out = Vec::with_capacity(data.len());
298 for row in data {
299 if row.len() != d {
300 return Err(PcaError::DimensionMismatch);
301 }
302 let centered: Vec<f64> = row
304 .iter()
305 .enumerate()
306 .map(|(j, &v)| (v - model.mean[j]) / model.std_dev[j])
307 .collect();
308 let projected: Vec<f64> = (0..k)
310 .map(|c| dot(¢ered, &model.components[c]))
311 .collect();
312 out.push(projected);
313 }
314 Ok(out)
315 }
316
317 pub fn fit_transform(
319 data: &[Vec<f64>],
320 config: &PcaConfig,
321 ) -> Result<(PcaModel, Vec<Vec<f64>>), PcaError> {
322 let model = Self::fit(data, config)?;
323 let reduced = Self::transform(&model, data)?;
324 Ok((model, reduced))
325 }
326
327 pub fn inverse_transform(
329 model: &PcaModel,
330 reduced: &[Vec<f64>],
331 ) -> Result<Vec<Vec<f64>>, PcaError> {
332 let k = model.n_components;
333 let d = model.n_features;
334 let mut out = Vec::with_capacity(reduced.len());
335 for row in reduced {
336 if row.len() != k {
337 return Err(PcaError::DimensionMismatch);
338 }
339 let mut rec = vec![0.0f64; d];
341 for (c, &coeff) in row.iter().enumerate() {
342 #[allow(clippy::needless_range_loop)]
343 for j in 0..d {
344 rec[j] += coeff * model.components[c][j];
345 }
346 }
347 #[allow(clippy::needless_range_loop)]
349 for j in 0..d {
350 rec[j] = rec[j] * model.std_dev[j] + model.mean[j];
351 }
352 out.push(rec);
353 }
354 Ok(out)
355 }
356
357 pub fn explained_variance_ratio(model: &PcaModel) -> Vec<f64> {
359 model.explained_variance.clone()
360 }
361
362 pub fn cumulative_explained_variance(model: &PcaModel) -> Vec<f64> {
364 let mut cum = 0.0;
365 model
366 .explained_variance
367 .iter()
368 .map(|&v| {
369 cum += v;
370 cum
371 })
372 .collect()
373 }
374}
375
376#[cfg(test)]
381mod tests {
382 use super::*;
383
384 fn axis_aligned_2d() -> Vec<Vec<f64>> {
386 vec![
387 vec![3.0, 0.0],
388 vec![-3.0, 0.0],
389 vec![3.0, 0.1],
390 vec![-3.0, -0.1],
391 vec![3.0, 0.05],
392 vec![-3.0, -0.05],
393 ]
394 }
395
396 fn data_3d() -> Vec<Vec<f64>> {
398 vec![
399 vec![1.0, 2.0, 0.01],
400 vec![-1.0, -2.0, 0.01],
401 vec![2.0, 4.0, -0.01],
402 vec![-2.0, -4.0, -0.01],
403 vec![0.5, 1.0, 0.005],
404 vec![-0.5, -1.0, -0.005],
405 ]
406 }
407
408 #[test]
411 fn test_fit_insufficient_samples() {
412 let data = vec![vec![1.0, 2.0]];
413 let config = PcaConfig {
414 n_components: 1,
415 center: true,
416 scale: false,
417 };
418 assert!(matches!(
419 PcaReducer::fit(&data, &config),
420 Err(PcaError::InsufficientData { .. })
421 ));
422 }
423
424 #[test]
425 fn test_fit_insufficient_features() {
426 let data = vec![vec![1.0], vec![2.0], vec![3.0]];
427 let config = PcaConfig {
428 n_components: 1,
429 center: true,
430 scale: false,
431 };
432 assert!(matches!(
433 PcaReducer::fit(&data, &config),
434 Err(PcaError::InsufficientData { .. })
435 ));
436 }
437
438 #[test]
439 fn test_fit_too_many_components() {
440 let data = axis_aligned_2d();
441 let config = PcaConfig {
442 n_components: 5,
443 center: true,
444 scale: false,
445 };
446 assert!(matches!(
447 PcaReducer::fit(&data, &config),
448 Err(PcaError::InvalidComponents { .. })
449 ));
450 }
451
452 #[test]
453 fn test_fit_zero_components() {
454 let data = axis_aligned_2d();
455 let config = PcaConfig {
456 n_components: 0,
457 center: true,
458 scale: false,
459 };
460 assert!(matches!(
461 PcaReducer::fit(&data, &config),
462 Err(PcaError::InvalidComponents { .. })
463 ));
464 }
465
466 #[test]
467 fn test_transform_dimension_mismatch() {
468 let data = axis_aligned_2d();
469 let config = PcaConfig::default();
470 let model = PcaReducer::fit(&data, &config).expect("fit ok");
471 let bad = vec![vec![1.0, 2.0, 3.0]]; assert_eq!(
473 PcaReducer::transform(&model, &bad),
474 Err(PcaError::DimensionMismatch)
475 );
476 }
477
478 #[test]
479 fn test_inverse_transform_dimension_mismatch() {
480 let data = axis_aligned_2d();
481 let config = PcaConfig::default();
482 let model = PcaReducer::fit(&data, &config).expect("fit ok");
483 let bad = vec![vec![1.0, 2.0, 3.0]]; assert_eq!(
485 PcaReducer::inverse_transform(&model, &bad),
486 Err(PcaError::DimensionMismatch)
487 );
488 }
489
490 #[test]
493 fn test_error_display_insufficient() {
494 let e = PcaError::InsufficientData {
495 samples: 1,
496 features: 1,
497 };
498 assert!(e.to_string().contains("insufficient"));
499 }
500
501 #[test]
502 fn test_error_display_invalid_components() {
503 let e = PcaError::InvalidComponents {
504 requested: 5,
505 max: 2,
506 };
507 assert!(e.to_string().contains("5"));
508 }
509
510 #[test]
511 fn test_error_display_dimension_mismatch() {
512 let e = PcaError::DimensionMismatch;
513 assert!(e.to_string().contains("dimension"));
514 }
515
516 #[test]
517 fn test_error_is_std_error() {
518 let e: Box<dyn std::error::Error> = Box::new(PcaError::DimensionMismatch);
519 assert!(!e.to_string().is_empty());
520 }
521
522 #[test]
525 fn test_fit_produces_model_with_correct_n_components() {
526 let data = axis_aligned_2d();
527 let config = PcaConfig {
528 n_components: 1,
529 center: true,
530 scale: false,
531 };
532 let model = PcaReducer::fit(&data, &config).expect("fit ok");
533 assert_eq!(model.n_components, 1);
534 assert_eq!(model.components.len(), 1);
535 }
536
537 #[test]
538 fn test_fit_component_is_unit_vector() {
539 let data = axis_aligned_2d();
540 let config = PcaConfig {
541 n_components: 1,
542 center: true,
543 scale: false,
544 };
545 let model = PcaReducer::fit(&data, &config).expect("fit ok");
546 let n = norm(&model.components[0]);
547 assert!((n - 1.0).abs() < 1e-9, "component should be a unit vector");
548 }
549
550 #[test]
551 fn test_fit_mean_equals_column_means() {
552 let data = axis_aligned_2d();
553 let config = PcaConfig {
554 n_components: 1,
555 center: true,
556 scale: false,
557 };
558 let model = PcaReducer::fit(&data, &config).expect("fit ok");
559 assert!(model.mean[0].abs() < 1e-9);
561 assert!(model.mean[1].abs() < 1e-9);
562 }
563
564 #[test]
565 fn test_fit_n_features_stored() {
566 let data = axis_aligned_2d();
567 let config = PcaConfig::default();
568 let model = PcaReducer::fit(&data, &config).expect("fit ok");
569 assert_eq!(model.n_features, 2);
570 }
571
572 #[test]
573 fn test_fit_explained_variance_sums_to_one() {
574 let data = data_3d();
575 let config = PcaConfig {
576 n_components: 2,
577 center: true,
578 scale: false,
579 };
580 let model = PcaReducer::fit(&data, &config).expect("fit ok");
581 let sum: f64 = model.explained_variance.iter().sum();
582 assert!(
584 (sum - 1.0).abs() < 1e-9,
585 "explained variance ratios should sum to 1.0"
586 );
587 }
588
589 #[test]
590 fn test_fit_std_dev_ones_when_no_scale() {
591 let data = axis_aligned_2d();
592 let config = PcaConfig {
593 n_components: 1,
594 center: true,
595 scale: false,
596 };
597 let model = PcaReducer::fit(&data, &config).expect("fit ok");
598 for s in &model.std_dev {
599 assert!((*s - 1.0).abs() < 1e-9);
600 }
601 }
602
603 #[test]
604 fn test_fit_scale_changes_std_dev() {
605 let data = axis_aligned_2d();
606 let config = PcaConfig {
607 n_components: 1,
608 center: true,
609 scale: true,
610 };
611 let model = PcaReducer::fit(&data, &config).expect("fit ok");
612 assert!(
614 model.std_dev[0] > 1.0,
615 "std_dev should be > 1 for large-variance feature"
616 );
617 }
618
619 #[test]
622 fn test_transform_output_shape() {
623 let data = axis_aligned_2d();
624 let config = PcaConfig {
625 n_components: 1,
626 center: true,
627 scale: false,
628 };
629 let model = PcaReducer::fit(&data, &config).expect("fit ok");
630 let reduced = PcaReducer::transform(&model, &data).expect("transform ok");
631 assert_eq!(reduced.len(), data.len());
632 assert_eq!(reduced[0].len(), 1);
633 }
634
635 #[test]
636 fn test_transform_two_components() {
637 let data = data_3d();
638 let config = PcaConfig {
639 n_components: 2,
640 center: true,
641 scale: false,
642 };
643 let model = PcaReducer::fit(&data, &config).expect("fit ok");
644 let reduced = PcaReducer::transform(&model, &data).expect("transform ok");
645 for row in &reduced {
646 assert_eq!(row.len(), 2);
647 }
648 }
649
650 #[test]
653 fn test_fit_transform_consistent_with_separate_calls() {
654 let data = axis_aligned_2d();
655 let config = PcaConfig::default();
656 let (model, reduced_combined) = PcaReducer::fit_transform(&data, &config).expect("ok");
657 let reduced_separate = PcaReducer::transform(&model, &data).expect("ok");
658 for (a, b) in reduced_combined.iter().zip(reduced_separate.iter()) {
659 for (x, y) in a.iter().zip(b.iter()) {
660 assert!((x - y).abs() < 1e-12);
661 }
662 }
663 }
664
665 #[test]
668 fn test_inverse_transform_output_shape() {
669 let data = axis_aligned_2d();
670 let config = PcaConfig::default();
671 let (model, reduced) = PcaReducer::fit_transform(&data, &config).expect("ok");
672 let reconstructed = PcaReducer::inverse_transform(&model, &reduced).expect("ok");
673 assert_eq!(reconstructed.len(), data.len());
674 assert_eq!(reconstructed[0].len(), 2);
675 }
676
677 #[test]
678 fn test_inverse_transform_approximation() {
679 let data = axis_aligned_2d();
681 let config = PcaConfig {
682 n_components: 2,
683 center: true,
684 scale: false,
685 };
686 let (model, reduced) = PcaReducer::fit_transform(&data, &config).expect("ok");
687 let reconstructed = PcaReducer::inverse_transform(&model, &reduced).expect("ok");
688 for (orig, rec) in data.iter().zip(reconstructed.iter()) {
689 for (a, b) in orig.iter().zip(rec.iter()) {
690 assert!(
692 (a - b).abs() < 0.05,
693 "reconstruction error too large: {a} vs {b}"
694 );
695 }
696 }
697 }
698
699 #[test]
702 fn test_explained_variance_ratio_all_nonneg() {
703 let data = data_3d();
704 let config = PcaConfig {
705 n_components: 2,
706 center: true,
707 scale: false,
708 };
709 let model = PcaReducer::fit(&data, &config).expect("ok");
710 for &r in PcaReducer::explained_variance_ratio(&model).iter() {
711 assert!(r >= 0.0);
712 }
713 }
714
715 #[test]
716 fn test_cumulative_explained_variance_monotone() {
717 let data = data_3d();
718 let config = PcaConfig {
719 n_components: 2,
720 center: true,
721 scale: false,
722 };
723 let model = PcaReducer::fit(&data, &config).expect("ok");
724 let cum = PcaReducer::cumulative_explained_variance(&model);
725 assert_eq!(cum.len(), 2);
726 assert!(cum[0] <= cum[1] + 1e-12);
727 }
728
729 #[test]
730 fn test_cumulative_explained_variance_last_is_one() {
731 let data = axis_aligned_2d();
732 let config = PcaConfig {
733 n_components: 2,
734 center: true,
735 scale: false,
736 };
737 let model = PcaReducer::fit(&data, &config).expect("ok");
738 let cum = PcaReducer::cumulative_explained_variance(&model);
739 assert!((cum.last().copied().unwrap_or(0.0) - 1.0).abs() < 1e-9);
740 }
741
742 #[test]
745 fn test_dot_product() {
746 assert!((dot(&[1.0, 2.0, 3.0], &[4.0, 5.0, 6.0]) - 32.0).abs() < 1e-12);
747 }
748
749 #[test]
750 fn test_norm() {
751 assert!((norm(&[3.0, 4.0]) - 5.0).abs() < 1e-12);
752 }
753
754 #[test]
755 fn test_normalize() {
756 let mut v = vec![3.0, 4.0];
757 normalize(&mut v);
758 assert!((norm(&v) - 1.0).abs() < 1e-12);
759 }
760
761 #[test]
762 fn test_normalize_zero_vector_unchanged() {
763 let mut v = vec![0.0, 0.0];
764 normalize(&mut v);
765 assert_eq!(v, vec![0.0, 0.0]);
766 }
767
768 #[test]
771 fn test_pca_reducer_new() {
772 let _r = PcaReducer::new();
773 }
774
775 #[test]
778 fn test_pca_config_default() {
779 let c = PcaConfig::default();
780 assert_eq!(c.n_components, 2);
781 assert!(c.center);
782 assert!(!c.scale);
783 }
784
785 #[test]
788 fn test_fit_transform_with_scale() {
789 let data = vec![
790 vec![100.0, 0.001],
791 vec![-100.0, 0.002],
792 vec![200.0, -0.001],
793 vec![-200.0, -0.002],
794 vec![50.0, 0.0005],
795 vec![-50.0, -0.0005],
796 ];
797 let config = PcaConfig {
798 n_components: 1,
799 center: true,
800 scale: true,
801 };
802 let result = PcaReducer::fit_transform(&data, &config);
803 assert!(result.is_ok(), "fit_transform with scale should succeed");
804 }
805
806 #[test]
809 fn test_components_orthogonal() {
810 let data = data_3d();
811 let config = PcaConfig {
812 n_components: 2,
813 center: true,
814 scale: false,
815 };
816 let model = PcaReducer::fit(&data, &config).expect("ok");
817 let d01 = dot(&model.components[0], &model.components[1]).abs();
818 assert!(d01 < 1e-6, "components should be orthogonal, got dot={d01}");
819 }
820
821 #[test]
822 fn test_model_clone() {
823 let data = axis_aligned_2d();
824 let config = PcaConfig::default();
825 let model = PcaReducer::fit(&data, &config).expect("ok");
826 let model2 = model.clone();
827 assert_eq!(model.n_components, model2.n_components);
828 }
829}