[−][src]Struct linfa_clustering::GaussianMixtureModel

pub struct GaussianMixtureModel<F: Float> { /* fields omitted */ }

Gaussian Mixture Model (GMM) aims at clustering a dataset by finding normally distributed sub datasets (hence the Gaussian Mixture name) .

GMM assumes all the data points are generated from a mixture of a number K of Gaussian distributions with certain parameters. Expectation-maximization (EM) algorithm is used to fit the GMM to the dataset by parameterizing the weight, mean, and covariance of each cluster distribution.

This implementation is a port of the scikit-learn 0.23.2 Gaussian Mixture implementation.

The algorithm

The general idea is to maximize the likelihood (equivalently the log likelihood) that is maximising the probability that the dataset is drawn from our mixture of normal distributions.

After an initialization step which can be either from random distribution or from the result of the KMeans algorithm (which is the default value of the init_method parameter). The core EM iterative algorithm for Gaussian Mixture is a fixed-point two-step algorithm:

Expectation step: compute the expectation of the likelihood of the current gaussian mixture model wrt the dataset.
Maximization step: update the gaussian parameters (weigths, means and covariances) to maximize the likelihood.

We stop iterating when there is no significant gaussian parameters change (controlled by the tolerance parameter) or if we reach a max number of iterations (controlled by max_n_iterations parameter) As the initialization of the algorithm is subject to randomness, several initializations are performed (controlled by the n_runs parameter).

Tutorial

Let's do a walkthrough of a training-predict-save example.

use linfa::DatasetBase;
use linfa::traits::{Fit, Predict};
use linfa_clustering::{GmmHyperParams, GaussianMixtureModel, generate_blobs};
use ndarray::{Axis, array, s, Zip};
use ndarray_rand::rand::SeedableRng;
use rand_isaac::Isaac64Rng;
use approx::assert_abs_diff_eq;

let mut rng = Isaac64Rng::seed_from_u64(42);
let expected_centroids = array![[0., 1.], [-10., 20.], [-1., 10.]];
let n = 200;

// We generate a dataset from points normally distributed around some distant centroids.  
let dataset = DatasetBase::from(generate_blobs(n, &expected_centroids, &mut rng));

// Our GMM is expected to have a number of clusters equals the number of centroids
// used to generate the dataset
let n_clusters = expected_centroids.len_of(Axis(0));

// We fit the model from the dataset setting some options
let gmm = GaussianMixtureModel::params(n_clusters)
            .with_n_runs(10)
            .with_tolerance(1e-4)
            .with_rng(rng)
            .fit(&dataset).expect("GMM fitting");

// Then we can get dataset membership information, targets contain **cluster indexes**
// corresponding to the cluster infos in the list of GMM means and covariances
let blobs_dataset = gmm.predict(dataset);
let DatasetBase {
    records: _blobs_records,
    targets: blobs_targets,
    ..
} = blobs_dataset;
println!("GMM means = {:?}", gmm.means());
println!("GMM covariances = {:?}", gmm.covariances());
println!("GMM membership = {:?}", blobs_targets);

// We can also get the nearest cluster for a new point
let new_observation = DatasetBase::from(array![[-9., 20.5]]);
// Predict returns the **index** of the nearest cluster
let dataset = gmm.predict(new_observation);
// We can retrieve the actual centroid of the closest cluster using `.centroids()` (alias of .means())
let closest_centroid = &gmm.centroids().index_axis(Axis(0), dataset.targets()[0]);

Implementations

`impl<F: Float + Lapack + Scalar> GaussianMixtureModel<F>`[src]

`pub fn params(n_clusters: usize) -> GmmHyperParams<F, Isaac64Rng>`[src]

`pub fn weights(&self) -> &Array1<F>`[src]

`pub fn means(&self) -> &Array2<F>`[src]

`pub fn covariances(&self) -> &Array3<F>`[src]

`pub fn precisions(&self) -> &Array3<F>`[src]

`pub fn centroids(&self) -> &Array2<F>`[src]

Trait Implementations

`impl<F: Float> Clone for GaussianMixtureModel<F>`[src]

`pub fn clone(&self) -> Self`[src]

`pub fn clone_from(&mut self, source: &Self)`1.0.0[src]

`impl<F: Debug + Float> Debug for GaussianMixtureModel<F>`[src]

`pub fn fmt(&self, f: &mut Formatter<'_>) -> Result`[src]

`impl<F: PartialEq + Float> PartialEq<GaussianMixtureModel<F>> for GaussianMixtureModel<F>`[src]

`pub fn eq(&self, other: &GaussianMixtureModel<F>) -> bool`[src]

`pub fn ne(&self, other: &GaussianMixtureModel<F>) -> bool`[src]

`impl<F: Float + Lapack + Scalar, D: Data<Elem = F>> Predict<&'_ ArrayBase<D, Dim<[usize; 2]>>, ArrayBase<OwnedRepr<usize>, Dim<[usize; 1]>>> for GaussianMixtureModel<F>`[src]

`pub fn predict(&self, observations: &ArrayBase<D, Ix2>) -> Array1<usize>`[src]

`impl<F: Float + Lapack + Scalar, D: Data<Elem = F>, T: Targets> Predict<DatasetBase<ArrayBase<D, Dim<[usize; 2]>>, T>, DatasetBase<ArrayBase<D, Dim<[usize; 2]>>, ArrayBase<OwnedRepr<usize>, Dim<[usize; 1]>>>> for GaussianMixtureModel<F>`[src]

`pub fn predict( &self, dataset: DatasetBase<ArrayBase<D, Ix2>, T> ) -> DatasetBase<ArrayBase<D, Ix2>, Array1<usize>>`[src]

`impl<F: Float> StructuralPartialEq for GaussianMixtureModel<F>`[src]

Auto Trait Implementations

`impl<F> RefUnwindSafe for GaussianMixtureModel<F> where F: RefUnwindSafe,` [src]

`impl<F> Send for GaussianMixtureModel<F>`[src]

`impl<F> Sync for GaussianMixtureModel<F>`[src]

`impl<F> Unpin for GaussianMixtureModel<F>`[src]

`impl<F> UnwindSafe for GaussianMixtureModel<F> where F: RefUnwindSafe,` [src]

Blanket Implementations

`impl<T> Any for T where T: 'static + ?Sized,` [src]

`pub fn type_id(&self) -> TypeId`[src]

`impl<T> Borrow<T> for T where T: ?Sized,` [src]

`pub fn borrow(&self) -> &T`[src]

`impl<T> BorrowMut<T> for T where T: ?Sized,` [src]

`pub fn borrow_mut(&mut self) -> &mut T`[src]

`impl<T> From<T> for T`[src]

`pub fn from(t: T) -> T`[src]

`impl<T, U> Into for T where U: From<T>,` [src]

`pub fn into(self) -> U`[src]

`impl<T> Pointable for T`

`pub const ALIGN: usize`

`type Init = T`

The type for initializers.

`pub unsafe fn init(init: <T as Pointable>::Init) -> usize`

`pub unsafe fn deref<'a>(ptr: usize) -> &'a T`

`pub unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T`

`pub unsafe fn drop(ptr: usize)`

`impl<SS, SP> SupersetOf<SS> for SP where SS: SubsetOf<SP>,`

`pub fn to_subset(&self) -> Option<SS>`

`pub fn is_in_subset(&self) -> bool`

`pub unsafe fn to_subset_unchecked(&self) -> SS`

`pub fn from_subset(element: &SS) -> SP`

`impl<T> ToOwned for T where T: Clone,` [src]

`type Owned = T`

The resulting type after obtaining ownership.

`pub fn to_owned(&self) -> T`[src]

`pub fn clone_into(&self, target: &mut T)`[src]

`impl<T, U> TryFrom for T where U: Into<T>,` [src]

`type Error = Infallible`

The type returned in the event of a conversion error.

`pub fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>`[src]

`impl<T, U> TryInto for T where U: TryFrom<T>,` [src]

`type Error = >::Error`

The type returned in the event of a conversion error.

`pub fn try_into(self) -> Result<U, >::Error>`[src]

`impl<V, T> VZip<V> for T where V: MultiLane<T>,`

`pub fn vzip(self) -> V`

[−][src]Struct linfa_clustering::GaussianMixtureModel

The algorithm

Tutorial

Implementations

impl<F: Float + Lapack + Scalar> GaussianMixtureModel<F>[src]

pub fn params(n_clusters: usize) -> GmmHyperParams<F, Isaac64Rng>[src]

pub fn weights(&self) -> &Array1<F>[src]

pub fn means(&self) -> &Array2<F>[src]

pub fn covariances(&self) -> &Array3<F>[src]

pub fn precisions(&self) -> &Array3<F>[src]

pub fn centroids(&self) -> &Array2<F>[src]

Trait Implementations

impl<F: Float> Clone for GaussianMixtureModel<F>[src]

pub fn clone(&self) -> Self[src]

pub fn clone_from(&mut self, source: &Self)1.0.0[src]

impl<F: Debug + Float> Debug for GaussianMixtureModel<F>[src]

pub fn fmt(&self, f: &mut Formatter<'_>) -> Result[src]

impl<F: PartialEq + Float> PartialEq<GaussianMixtureModel<F>> for GaussianMixtureModel<F>[src]

pub fn eq(&self, other: &GaussianMixtureModel<F>) -> bool[src]

pub fn ne(&self, other: &GaussianMixtureModel<F>) -> bool[src]

impl<F: Float + Lapack + Scalar, D: Data<Elem = F>> Predict<&'_ ArrayBase<D, Dim<[usize; 2]>>, ArrayBase<OwnedRepr<usize>, Dim<[usize; 1]>>> for GaussianMixtureModel<F>[src]

pub fn predict(&self, observations: &ArrayBase<D, Ix2>) -> Array1<usize>[src]

impl<F: Float + Lapack + Scalar, D: Data<Elem = F>, T: Targets> Predict<DatasetBase<ArrayBase<D, Dim<[usize; 2]>>, T>, DatasetBase<ArrayBase<D, Dim<[usize; 2]>>, ArrayBase<OwnedRepr<usize>, Dim<[usize; 1]>>>> for GaussianMixtureModel<F>[src]

pub fn predict( &self, dataset: DatasetBase<ArrayBase<D, Ix2>, T>) -> DatasetBase<ArrayBase<D, Ix2>, Array1<usize>>[src]

impl<F: Float> StructuralPartialEq for GaussianMixtureModel<F>[src]

Auto Trait Implementations

impl<F> RefUnwindSafe for GaussianMixtureModel<F> where F: RefUnwindSafe, [src]

impl<F> Send for GaussianMixtureModel<F>[src]

impl<F> Sync for GaussianMixtureModel<F>[src]

impl<F> Unpin for GaussianMixtureModel<F>[src]

impl<F> UnwindSafe for GaussianMixtureModel<F> where F: RefUnwindSafe, [src]

Blanket Implementations

impl<T> Any for T where T: 'static + ?Sized, [src]

pub fn type_id(&self) -> TypeId[src]

impl<T> Borrow<T> for T where T: ?Sized, [src]

pub fn borrow(&self) -> &T[src]

impl<T> BorrowMut<T> for T where T: ?Sized, [src]

pub fn borrow_mut(&mut self) -> &mut T[src]

impl<T> From<T> for T[src]

pub fn from(t: T) -> T[src]

impl<T, U> Into<U> for T where U: From<T>, [src]

pub fn into(self) -> U[src]

impl<T> Pointable for T

pub const ALIGN: usize

type Init = T

pub unsafe fn init(init: <T as Pointable>::Init) -> usize

pub unsafe fn deref<'a>(ptr: usize) -> &'a T

pub unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

pub unsafe fn drop(ptr: usize)

impl<SS, SP> SupersetOf<SS> for SP where SS: SubsetOf<SP>,

pub fn to_subset(&self) -> Option<SS>

pub fn is_in_subset(&self) -> bool

pub unsafe fn to_subset_unchecked(&self) -> SS

pub fn from_subset(element: &SS) -> SP

impl<T> ToOwned for T where T: Clone, [src]

type Owned = T

pub fn to_owned(&self) -> T[src]

pub fn clone_into(&self, target: &mut T)[src]

impl<T, U> TryFrom<U> for T where U: Into<T>, [src]

type Error = Infallible

pub fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>[src]

impl<T, U> TryInto<U> for T where U: TryFrom<T>, [src]

type Error = <U as TryFrom<T>>::Error

pub fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>[src]

impl<V, T> VZip<V> for T where V: MultiLane<T>,

pub fn vzip(self) -> V