1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
//! This crate implements a _Probabilistic Principal Component Analysis_ in pure Rust, based on a
//! legacy Python version, which was just too slow for the job (even with `numpy` "C code" to spice it up!)
//!
//! If you want all the comfort that the Python ecossystem can provide, you are welcome to check out the
//! python wrapper in [PyPI](https://pypi.org/project/ppca-rs/0.3.1/). However, if you want top performance
//! and great debugging support, this crate is for you.
//!
//! To get to know more about the PPCA algorithm for missing data, please check the references below:
//! * <https://www.robots.ox.ac.uk/~cvrg/hilary2006/ppca.pdf>: first result on Google.
//! * <http://perception.inrialpes.fr/people/Horaud/Courses/pdf/Horaud-MLSVP9.pdf>: lecture slides, also covering
//! PPCA mixtures.
//! * <https://www.youtube.com/watch?v=lJ0cXPoEozg>: video on YouTube explaining the topic.

// mod dataframe_adapter;
mod dataset;
mod mix;
mod output_covariance;
mod ppca_model;
mod prior;
mod utils;

pub use dataset::{Dataset, MaskedSample};
pub use mix::{InferredMaskedMix, PPCAMix, PosteriorSamplerMix};
pub use ppca_model::{InferredMasked, PPCAModel, PosteriorSampler};
pub use prior::Prior;

#[cfg(test)]
mod test {
    use super::*;

    use nalgebra::{dmatrix, dvector, DMatrix, DVector};
    use ppca_model::PPCAModel;
    use rand_distr::{Bernoulli, Distribution};

    fn toy_model() -> PPCAModel {
        PPCAModel::new(
            0.1,
            dmatrix![
                1.0, 1.0, 0.0;
                1.0, 0.0, 1.0;
            ]
            .transpose(),
            dvector![0.0, 1.0, 0.0],
        )
    }

    #[test]
    fn test_toy_model() {
        let real_model = toy_model();
        let sample = real_model.sample(1_000, 0.2);
        let mut model = PPCAModel::init(2, &sample);

        for iter in 0..1600 {
            println!(
                "At iteration {} model aic is {}",
                iter + 1,
                2.0 * (model.n_parameters() as f64 - model.llk(&sample)) / sample.len() as f64
            );
            model = model.iterate(&sample);
        }

        dbg!(model);
    }

    fn big_toy_model() -> PPCAModel {
        fn standard_noise_matrix(rows: usize, cols: usize) -> DMatrix<f64> {
            DMatrix::from_vec(
                rows,
                cols,
                Bernoulli::new(0.1)
                    .unwrap()
                    .sample_iter(rand::thread_rng())
                    .take(rows * cols)
                    .map(|selected| selected as i8 as f64)
                    .collect(),
            )
        }

        PPCAModel::new(0.1, standard_noise_matrix(200, 16), DVector::zeros(200))
    }

    #[test]
    fn test_big_toy_model() {
        let real_model = big_toy_model();
        let sample = real_model.sample(100_000, 0.2);
        let mut model = PPCAModel::init(16, &sample);

        for iter in 0..24 {
            println!(
                "At iteration {} model aic is {}",
                iter + 1,
                2.0 * (model.n_parameters() as f64 - model.llk(&sample)) / sample.len() as f64
            );
            model = model.iterate(&sample);
        }

        model.to_canonical();
        // dbg!(model);
    }
}