oxirs_embed/
pca_reducer.rs

1//! # PCA Reducer
2//!
3//! Principal Component Analysis (PCA) dimensionality reduction for embedding vectors,
4//! implemented from scratch without external linear algebra libraries.
5//!
6//! ## Algorithm
7//!
8//! 1. Optionally center (subtract column means) and scale (divide by std-dev).
9//! 2. Compute the covariance matrix.
10//! 3. Extract the top-k eigenvectors using power iteration with deflation.
11//! 4. Orthogonalise via Gram-Schmidt.
12//! 5. Project data onto the component subspace.
13
14use std::fmt;
15
16// ─────────────────────────────────────────────────────────────────────────────
17// Errors
18// ─────────────────────────────────────────────────────────────────────────────
19
20/// Errors arising from PCA operations.
21#[derive(Debug, Clone, PartialEq)]
22pub enum PcaError {
23    /// Not enough samples or features to perform PCA.
24    InsufficientData { samples: usize, features: usize },
25    /// Requested more components than the dimensionality allows.
26    InvalidComponents { requested: usize, max: usize },
27    /// Input dimensionality does not match the fitted model.
28    DimensionMismatch,
29}
30
31impl fmt::Display for PcaError {
32    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
33        match self {
34            PcaError::InsufficientData { samples, features } => write!(
35                f,
36                "insufficient data: {samples} samples and {features} features (need at least 2 of each)"
37            ),
38            PcaError::InvalidComponents { requested, max } => write!(
39                f,
40                "requested {requested} components but max is {max}"
41            ),
42            PcaError::DimensionMismatch => write!(f, "input dimension does not match the fitted model"),
43        }
44    }
45}
46
47impl std::error::Error for PcaError {}
48
49// ─────────────────────────────────────────────────────────────────────────────
50// Configuration
51// ─────────────────────────────────────────────────────────────────────────────
52
53/// Configuration for a PCA fitting operation.
54#[derive(Debug, Clone)]
55pub struct PcaConfig {
56    /// Number of principal components to extract.
57    pub n_components: usize,
58    /// If `true`, subtract the per-feature mean before computing components.
59    pub center: bool,
60    /// If `true`, divide by the per-feature standard deviation after centering.
61    pub scale: bool,
62}
63
64impl Default for PcaConfig {
65    fn default() -> Self {
66        Self {
67            n_components: 2,
68            center: true,
69            scale: false,
70        }
71    }
72}
73
74// ─────────────────────────────────────────────────────────────────────────────
75// Model
76// ─────────────────────────────────────────────────────────────────────────────
77
78/// A fitted PCA model that can be used for `transform` and `inverse_transform`.
79#[derive(Debug, Clone)]
80pub struct PcaModel {
81    /// Per-feature mean used for centering (zero if `center = false`).
82    pub mean: Vec<f64>,
83    /// Per-feature standard deviation used for scaling (one if `scale = false`).
84    pub std_dev: Vec<f64>,
85    /// Principal components as row vectors, each of length `n_features`.
86    pub components: Vec<Vec<f64>>,
87    /// Explained variance (eigenvalue) associated with each component.
88    pub explained_variance: Vec<f64>,
89    /// Number of components extracted.
90    pub n_components: usize,
91    /// Number of features in the training data.
92    pub n_features: usize,
93}
94
95// ─────────────────────────────────────────────────────────────────────────────
96// Math helpers
97// ─────────────────────────────────────────────────────────────────────────────
98
99/// Dot product of two equal-length slices.
100pub fn dot(a: &[f64], b: &[f64]) -> f64 {
101    a.iter().zip(b.iter()).map(|(x, y)| x * y).sum()
102}
103
104/// Euclidean norm of a vector.
105pub fn norm(v: &[f64]) -> f64 {
106    dot(v, v).sqrt()
107}
108
109/// Normalise `v` in-place; if the norm is (near) zero, the vector is left unchanged.
110pub fn normalize(v: &mut [f64]) {
111    let n = norm(v);
112    if n > 1e-15 {
113        for x in v.iter_mut() {
114            *x /= n;
115        }
116    }
117}
118
119// ─────────────────────────────────────────────────────────────────────────────
120// PcaReducer
121// ─────────────────────────────────────────────────────────────────────────────
122
123/// Provides `fit`, `transform`, `fit_transform`, and `inverse_transform` for PCA.
124#[derive(Debug, Default)]
125pub struct PcaReducer;
126
127impl PcaReducer {
128    /// Create a new reducer.
129    pub fn new() -> Self {
130        Self
131    }
132
133    /// Fit a `PcaModel` on `data`.
134    ///
135    /// Each row of `data` is a sample; each column is a feature.
136    pub fn fit(data: &[Vec<f64>], config: &PcaConfig) -> Result<PcaModel, PcaError> {
137        let n = data.len();
138        let d = data.first().map(|r| r.len()).unwrap_or(0);
139
140        if n < 2 || d < 2 {
141            return Err(PcaError::InsufficientData {
142                samples: n,
143                features: d,
144            });
145        }
146        let k = config.n_components;
147        if k == 0 || k > d {
148            return Err(PcaError::InvalidComponents {
149                requested: k,
150                max: d,
151            });
152        }
153
154        // 1. Compute per-feature mean
155        let mut mean = vec![0.0f64; d];
156        for row in data {
157            for (j, &v) in row.iter().enumerate() {
158                mean[j] += v;
159            }
160        }
161        for m in &mut mean {
162            *m /= n as f64;
163        }
164
165        // 2. Compute per-feature std-dev
166        let mut std_dev = vec![1.0f64; d];
167        if config.scale {
168            let mut variance = vec![0.0f64; d];
169            for row in data {
170                for (j, &v) in row.iter().enumerate() {
171                    let diff = v - mean[j];
172                    variance[j] += diff * diff;
173                }
174            }
175            for (j, s) in std_dev.iter_mut().enumerate() {
176                let var = variance[j] / (n as f64 - 1.0).max(1.0);
177                *s = if var > 1e-15 { var.sqrt() } else { 1.0 };
178            }
179        }
180
181        // 3. Center / scale data
182        let apply_mean = if config.center {
183            &mean[..]
184        } else {
185            &vec![0.0f64; d][..]
186        };
187        let centered: Vec<Vec<f64>> = data
188            .iter()
189            .map(|row| {
190                row.iter()
191                    .enumerate()
192                    .map(|(j, &v)| (v - apply_mean[j]) / std_dev[j])
193                    .collect()
194            })
195            .collect();
196
197        // 4. Compute covariance matrix  C[i][j] = (1/(n-1)) * Σ x_k[i] * x_k[j]
198        let denom = (n as f64 - 1.0).max(1.0);
199        let mut cov = vec![vec![0.0f64; d]; d];
200        for row in &centered {
201            #[allow(clippy::needless_range_loop)]
202            for i in 0..d {
203                for j in 0..d {
204                    cov[i][j] += row[i] * row[j];
205                }
206            }
207        }
208        #[allow(clippy::needless_range_loop)]
209        for i in 0..d {
210            for j in 0..d {
211                cov[i][j] /= denom;
212            }
213        }
214
215        // 5. Power iteration with deflation for top-k eigenvectors
216        let mut components: Vec<Vec<f64>> = Vec::with_capacity(k);
217        let mut eigenvalues: Vec<f64> = Vec::with_capacity(k);
218        // Deflated covariance matrix (starts as a copy of cov)
219        let mut deflated = cov.clone();
220
221        for comp_idx in 0..k {
222            // Initialise eigenvector estimate with the column that has maximum variance
223            let mut ev: Vec<f64> = (0..d).map(|j| deflated[j][j].abs()).collect();
224            normalize(&mut ev);
225            // Fallback: if all-zero, use a standard basis vector
226            if norm(&ev) < 1e-15 {
227                ev = vec![0.0; d];
228                if comp_idx < d {
229                    ev[comp_idx] = 1.0;
230                }
231            }
232
233            // 50 power iterations
234            for _ in 0..50 {
235                // new_ev = deflated * ev
236                let mut new_ev = vec![0.0f64; d];
237                for i in 0..d {
238                    for j in 0..d {
239                        new_ev[i] += deflated[i][j] * ev[j];
240                    }
241                }
242                normalize(&mut new_ev);
243                ev = new_ev;
244            }
245
246            // Eigenvalue: Rayleigh quotient λ = ev' * deflated * ev
247            let mut av = vec![0.0f64; d];
248            for i in 0..d {
249                for j in 0..d {
250                    av[i] += deflated[i][j] * ev[j];
251                }
252            }
253            let lambda = dot(&ev, &av);
254
255            // Gram-Schmidt orthogonalise against all previous components
256            for prev in &components {
257                let proj = dot(&ev, prev);
258                for j in 0..d {
259                    ev[j] -= proj * prev[j];
260                }
261            }
262            normalize(&mut ev);
263
264            eigenvalues.push(lambda.max(0.0));
265            components.push(ev.clone());
266
267            // Deflation: subtract the rank-1 contribution of this eigenvector
268            for i in 0..d {
269                for j in 0..d {
270                    deflated[i][j] -= lambda * ev[i] * ev[j];
271                }
272            }
273        }
274
275        // 6. Explained variance: already stored as eigenvalues (≥ 0)
276        let total_var: f64 = eigenvalues.iter().sum();
277        let explained_variance_ratio: Vec<f64> = if total_var > 1e-15 {
278            eigenvalues.iter().map(|&e| e / total_var).collect()
279        } else {
280            vec![1.0 / k as f64; k]
281        };
282
283        Ok(PcaModel {
284            mean,
285            std_dev,
286            components,
287            explained_variance: explained_variance_ratio,
288            n_components: k,
289            n_features: d,
290        })
291    }
292
293    /// Project `data` onto the components of a fitted `model`.
294    pub fn transform(model: &PcaModel, data: &[Vec<f64>]) -> Result<Vec<Vec<f64>>, PcaError> {
295        let d = model.n_features;
296        let k = model.n_components;
297        let mut out = Vec::with_capacity(data.len());
298        for row in data {
299            if row.len() != d {
300                return Err(PcaError::DimensionMismatch);
301            }
302            // Center / scale
303            let centered: Vec<f64> = row
304                .iter()
305                .enumerate()
306                .map(|(j, &v)| (v - model.mean[j]) / model.std_dev[j])
307                .collect();
308            // Project onto each component
309            let projected: Vec<f64> = (0..k)
310                .map(|c| dot(&centered, &model.components[c]))
311                .collect();
312            out.push(projected);
313        }
314        Ok(out)
315    }
316
317    /// Fit a model and immediately transform `data`.
318    pub fn fit_transform(
319        data: &[Vec<f64>],
320        config: &PcaConfig,
321    ) -> Result<(PcaModel, Vec<Vec<f64>>), PcaError> {
322        let model = Self::fit(data, config)?;
323        let reduced = Self::transform(&model, data)?;
324        Ok((model, reduced))
325    }
326
327    /// Reconstruct approximate original-space vectors from reduced-space vectors.
328    pub fn inverse_transform(
329        model: &PcaModel,
330        reduced: &[Vec<f64>],
331    ) -> Result<Vec<Vec<f64>>, PcaError> {
332        let k = model.n_components;
333        let d = model.n_features;
334        let mut out = Vec::with_capacity(reduced.len());
335        for row in reduced {
336            if row.len() != k {
337                return Err(PcaError::DimensionMismatch);
338            }
339            // Reconstruct in centred/scaled space: x ≈ Σ row[c] * component[c]
340            let mut rec = vec![0.0f64; d];
341            for (c, &coeff) in row.iter().enumerate() {
342                #[allow(clippy::needless_range_loop)]
343                for j in 0..d {
344                    rec[j] += coeff * model.components[c][j];
345                }
346            }
347            // Un-scale then un-center
348            #[allow(clippy::needless_range_loop)]
349            for j in 0..d {
350                rec[j] = rec[j] * model.std_dev[j] + model.mean[j];
351            }
352            out.push(rec);
353        }
354        Ok(out)
355    }
356
357    /// Return the fraction of total variance explained by each component.
358    pub fn explained_variance_ratio(model: &PcaModel) -> Vec<f64> {
359        model.explained_variance.clone()
360    }
361
362    /// Return the cumulative explained variance for each component.
363    pub fn cumulative_explained_variance(model: &PcaModel) -> Vec<f64> {
364        let mut cum = 0.0;
365        model
366            .explained_variance
367            .iter()
368            .map(|&v| {
369                cum += v;
370                cum
371            })
372            .collect()
373    }
374}
375
376// ─────────────────────────────────────────────────────────────────────────────
377// Tests
378// ─────────────────────────────────────────────────────────────────────────────
379
380#[cfg(test)]
381mod tests {
382    use super::*;
383
384    /// Create a simple 2-D dataset with variance along the first axis.
385    fn axis_aligned_2d() -> Vec<Vec<f64>> {
386        vec![
387            vec![3.0, 0.0],
388            vec![-3.0, 0.0],
389            vec![3.0, 0.1],
390            vec![-3.0, -0.1],
391            vec![3.0, 0.05],
392            vec![-3.0, -0.05],
393        ]
394    }
395
396    /// 3-D dataset where variance is dominant along the first two axes.
397    fn data_3d() -> Vec<Vec<f64>> {
398        vec![
399            vec![1.0, 2.0, 0.01],
400            vec![-1.0, -2.0, 0.01],
401            vec![2.0, 4.0, -0.01],
402            vec![-2.0, -4.0, -0.01],
403            vec![0.5, 1.0, 0.005],
404            vec![-0.5, -1.0, -0.005],
405        ]
406    }
407
408    // ── Error cases ──────────────────────────────────────────────────────────
409
410    #[test]
411    fn test_fit_insufficient_samples() {
412        let data = vec![vec![1.0, 2.0]];
413        let config = PcaConfig {
414            n_components: 1,
415            center: true,
416            scale: false,
417        };
418        assert!(matches!(
419            PcaReducer::fit(&data, &config),
420            Err(PcaError::InsufficientData { .. })
421        ));
422    }
423
424    #[test]
425    fn test_fit_insufficient_features() {
426        let data = vec![vec![1.0], vec![2.0], vec![3.0]];
427        let config = PcaConfig {
428            n_components: 1,
429            center: true,
430            scale: false,
431        };
432        assert!(matches!(
433            PcaReducer::fit(&data, &config),
434            Err(PcaError::InsufficientData { .. })
435        ));
436    }
437
438    #[test]
439    fn test_fit_too_many_components() {
440        let data = axis_aligned_2d();
441        let config = PcaConfig {
442            n_components: 5,
443            center: true,
444            scale: false,
445        };
446        assert!(matches!(
447            PcaReducer::fit(&data, &config),
448            Err(PcaError::InvalidComponents { .. })
449        ));
450    }
451
452    #[test]
453    fn test_fit_zero_components() {
454        let data = axis_aligned_2d();
455        let config = PcaConfig {
456            n_components: 0,
457            center: true,
458            scale: false,
459        };
460        assert!(matches!(
461            PcaReducer::fit(&data, &config),
462            Err(PcaError::InvalidComponents { .. })
463        ));
464    }
465
466    #[test]
467    fn test_transform_dimension_mismatch() {
468        let data = axis_aligned_2d();
469        let config = PcaConfig::default();
470        let model = PcaReducer::fit(&data, &config).expect("fit ok");
471        let bad = vec![vec![1.0, 2.0, 3.0]]; // 3 features, model expects 2
472        assert_eq!(
473            PcaReducer::transform(&model, &bad),
474            Err(PcaError::DimensionMismatch)
475        );
476    }
477
478    #[test]
479    fn test_inverse_transform_dimension_mismatch() {
480        let data = axis_aligned_2d();
481        let config = PcaConfig::default();
482        let model = PcaReducer::fit(&data, &config).expect("fit ok");
483        let bad = vec![vec![1.0, 2.0, 3.0]]; // 3 comps, model has 2
484        assert_eq!(
485            PcaReducer::inverse_transform(&model, &bad),
486            Err(PcaError::DimensionMismatch)
487        );
488    }
489
490    // ── PcaError display ─────────────────────────────────────────────────────
491
492    #[test]
493    fn test_error_display_insufficient() {
494        let e = PcaError::InsufficientData {
495            samples: 1,
496            features: 1,
497        };
498        assert!(e.to_string().contains("insufficient"));
499    }
500
501    #[test]
502    fn test_error_display_invalid_components() {
503        let e = PcaError::InvalidComponents {
504            requested: 5,
505            max: 2,
506        };
507        assert!(e.to_string().contains("5"));
508    }
509
510    #[test]
511    fn test_error_display_dimension_mismatch() {
512        let e = PcaError::DimensionMismatch;
513        assert!(e.to_string().contains("dimension"));
514    }
515
516    #[test]
517    fn test_error_is_std_error() {
518        let e: Box<dyn std::error::Error> = Box::new(PcaError::DimensionMismatch);
519        assert!(!e.to_string().is_empty());
520    }
521
522    // ── Fit output properties ─────────────────────────────────────────────────
523
524    #[test]
525    fn test_fit_produces_model_with_correct_n_components() {
526        let data = axis_aligned_2d();
527        let config = PcaConfig {
528            n_components: 1,
529            center: true,
530            scale: false,
531        };
532        let model = PcaReducer::fit(&data, &config).expect("fit ok");
533        assert_eq!(model.n_components, 1);
534        assert_eq!(model.components.len(), 1);
535    }
536
537    #[test]
538    fn test_fit_component_is_unit_vector() {
539        let data = axis_aligned_2d();
540        let config = PcaConfig {
541            n_components: 1,
542            center: true,
543            scale: false,
544        };
545        let model = PcaReducer::fit(&data, &config).expect("fit ok");
546        let n = norm(&model.components[0]);
547        assert!((n - 1.0).abs() < 1e-9, "component should be a unit vector");
548    }
549
550    #[test]
551    fn test_fit_mean_equals_column_means() {
552        let data = axis_aligned_2d();
553        let config = PcaConfig {
554            n_components: 1,
555            center: true,
556            scale: false,
557        };
558        let model = PcaReducer::fit(&data, &config).expect("fit ok");
559        // All x-values sum to 0, all y-values sum to 0
560        assert!(model.mean[0].abs() < 1e-9);
561        assert!(model.mean[1].abs() < 1e-9);
562    }
563
564    #[test]
565    fn test_fit_n_features_stored() {
566        let data = axis_aligned_2d();
567        let config = PcaConfig::default();
568        let model = PcaReducer::fit(&data, &config).expect("fit ok");
569        assert_eq!(model.n_features, 2);
570    }
571
572    #[test]
573    fn test_fit_explained_variance_sums_to_one() {
574        let data = data_3d();
575        let config = PcaConfig {
576            n_components: 2,
577            center: true,
578            scale: false,
579        };
580        let model = PcaReducer::fit(&data, &config).expect("fit ok");
581        let sum: f64 = model.explained_variance.iter().sum();
582        // Sum should equal 1.0 (ratio is normalised against extracted components)
583        assert!(
584            (sum - 1.0).abs() < 1e-9,
585            "explained variance ratios should sum to 1.0"
586        );
587    }
588
589    #[test]
590    fn test_fit_std_dev_ones_when_no_scale() {
591        let data = axis_aligned_2d();
592        let config = PcaConfig {
593            n_components: 1,
594            center: true,
595            scale: false,
596        };
597        let model = PcaReducer::fit(&data, &config).expect("fit ok");
598        for s in &model.std_dev {
599            assert!((*s - 1.0).abs() < 1e-9);
600        }
601    }
602
603    #[test]
604    fn test_fit_scale_changes_std_dev() {
605        let data = axis_aligned_2d();
606        let config = PcaConfig {
607            n_components: 1,
608            center: true,
609            scale: true,
610        };
611        let model = PcaReducer::fit(&data, &config).expect("fit ok");
612        // std_dev for the x-axis should be ~3.0
613        assert!(
614            model.std_dev[0] > 1.0,
615            "std_dev should be > 1 for large-variance feature"
616        );
617    }
618
619    // ── Transform ────────────────────────────────────────────────────────────
620
621    #[test]
622    fn test_transform_output_shape() {
623        let data = axis_aligned_2d();
624        let config = PcaConfig {
625            n_components: 1,
626            center: true,
627            scale: false,
628        };
629        let model = PcaReducer::fit(&data, &config).expect("fit ok");
630        let reduced = PcaReducer::transform(&model, &data).expect("transform ok");
631        assert_eq!(reduced.len(), data.len());
632        assert_eq!(reduced[0].len(), 1);
633    }
634
635    #[test]
636    fn test_transform_two_components() {
637        let data = data_3d();
638        let config = PcaConfig {
639            n_components: 2,
640            center: true,
641            scale: false,
642        };
643        let model = PcaReducer::fit(&data, &config).expect("fit ok");
644        let reduced = PcaReducer::transform(&model, &data).expect("transform ok");
645        for row in &reduced {
646            assert_eq!(row.len(), 2);
647        }
648    }
649
650    // ── fit_transform ────────────────────────────────────────────────────────
651
652    #[test]
653    fn test_fit_transform_consistent_with_separate_calls() {
654        let data = axis_aligned_2d();
655        let config = PcaConfig::default();
656        let (model, reduced_combined) = PcaReducer::fit_transform(&data, &config).expect("ok");
657        let reduced_separate = PcaReducer::transform(&model, &data).expect("ok");
658        for (a, b) in reduced_combined.iter().zip(reduced_separate.iter()) {
659            for (x, y) in a.iter().zip(b.iter()) {
660                assert!((x - y).abs() < 1e-12);
661            }
662        }
663    }
664
665    // ── inverse_transform ────────────────────────────────────────────────────
666
667    #[test]
668    fn test_inverse_transform_output_shape() {
669        let data = axis_aligned_2d();
670        let config = PcaConfig::default();
671        let (model, reduced) = PcaReducer::fit_transform(&data, &config).expect("ok");
672        let reconstructed = PcaReducer::inverse_transform(&model, &reduced).expect("ok");
673        assert_eq!(reconstructed.len(), data.len());
674        assert_eq!(reconstructed[0].len(), 2);
675    }
676
677    #[test]
678    fn test_inverse_transform_approximation() {
679        // With k == d == 2 components, reconstruction should be exact (up to floating point).
680        let data = axis_aligned_2d();
681        let config = PcaConfig {
682            n_components: 2,
683            center: true,
684            scale: false,
685        };
686        let (model, reduced) = PcaReducer::fit_transform(&data, &config).expect("ok");
687        let reconstructed = PcaReducer::inverse_transform(&model, &reduced).expect("ok");
688        for (orig, rec) in data.iter().zip(reconstructed.iter()) {
689            for (a, b) in orig.iter().zip(rec.iter()) {
690                // Allow slightly larger tolerance due to floating-point accumulation
691                assert!(
692                    (a - b).abs() < 0.05,
693                    "reconstruction error too large: {a} vs {b}"
694                );
695            }
696        }
697    }
698
699    // ── explained_variance_ratio / cumulative ─────────────────────────────────
700
701    #[test]
702    fn test_explained_variance_ratio_all_nonneg() {
703        let data = data_3d();
704        let config = PcaConfig {
705            n_components: 2,
706            center: true,
707            scale: false,
708        };
709        let model = PcaReducer::fit(&data, &config).expect("ok");
710        for &r in PcaReducer::explained_variance_ratio(&model).iter() {
711            assert!(r >= 0.0);
712        }
713    }
714
715    #[test]
716    fn test_cumulative_explained_variance_monotone() {
717        let data = data_3d();
718        let config = PcaConfig {
719            n_components: 2,
720            center: true,
721            scale: false,
722        };
723        let model = PcaReducer::fit(&data, &config).expect("ok");
724        let cum = PcaReducer::cumulative_explained_variance(&model);
725        assert_eq!(cum.len(), 2);
726        assert!(cum[0] <= cum[1] + 1e-12);
727    }
728
729    #[test]
730    fn test_cumulative_explained_variance_last_is_one() {
731        let data = axis_aligned_2d();
732        let config = PcaConfig {
733            n_components: 2,
734            center: true,
735            scale: false,
736        };
737        let model = PcaReducer::fit(&data, &config).expect("ok");
738        let cum = PcaReducer::cumulative_explained_variance(&model);
739        assert!((cum.last().copied().unwrap_or(0.0) - 1.0).abs() < 1e-9);
740    }
741
742    // ── Math helpers ──────────────────────────────────────────────────────────
743
744    #[test]
745    fn test_dot_product() {
746        assert!((dot(&[1.0, 2.0, 3.0], &[4.0, 5.0, 6.0]) - 32.0).abs() < 1e-12);
747    }
748
749    #[test]
750    fn test_norm() {
751        assert!((norm(&[3.0, 4.0]) - 5.0).abs() < 1e-12);
752    }
753
754    #[test]
755    fn test_normalize() {
756        let mut v = vec![3.0, 4.0];
757        normalize(&mut v);
758        assert!((norm(&v) - 1.0).abs() < 1e-12);
759    }
760
761    #[test]
762    fn test_normalize_zero_vector_unchanged() {
763        let mut v = vec![0.0, 0.0];
764        normalize(&mut v);
765        assert_eq!(v, vec![0.0, 0.0]);
766    }
767
768    // ── PcaReducer::new ───────────────────────────────────────────────────────
769
770    #[test]
771    fn test_pca_reducer_new() {
772        let _r = PcaReducer::new();
773    }
774
775    // ── PcaConfig default ─────────────────────────────────────────────────────
776
777    #[test]
778    fn test_pca_config_default() {
779        let c = PcaConfig::default();
780        assert_eq!(c.n_components, 2);
781        assert!(c.center);
782        assert!(!c.scale);
783    }
784
785    // ── Scale mode ────────────────────────────────────────────────────────────
786
787    #[test]
788    fn test_fit_transform_with_scale() {
789        let data = vec![
790            vec![100.0, 0.001],
791            vec![-100.0, 0.002],
792            vec![200.0, -0.001],
793            vec![-200.0, -0.002],
794            vec![50.0, 0.0005],
795            vec![-50.0, -0.0005],
796        ];
797        let config = PcaConfig {
798            n_components: 1,
799            center: true,
800            scale: true,
801        };
802        let result = PcaReducer::fit_transform(&data, &config);
803        assert!(result.is_ok(), "fit_transform with scale should succeed");
804    }
805
806    // ── Repeated fit_transform gives unit-length components ──────────────────
807
808    #[test]
809    fn test_components_orthogonal() {
810        let data = data_3d();
811        let config = PcaConfig {
812            n_components: 2,
813            center: true,
814            scale: false,
815        };
816        let model = PcaReducer::fit(&data, &config).expect("ok");
817        let d01 = dot(&model.components[0], &model.components[1]).abs();
818        assert!(d01 < 1e-6, "components should be orthogonal, got dot={d01}");
819    }
820
821    #[test]
822    fn test_model_clone() {
823        let data = axis_aligned_2d();
824        let config = PcaConfig::default();
825        let model = PcaReducer::fit(&data, &config).expect("ok");
826        let model2 = model.clone();
827        assert_eq!(model.n_components, model2.n_components);
828    }
829}
oxirs_embed/pca_reducer.rs

oxirs_embed/
pca_reducer.rs