tinguely 0.1.1

Machine learning library
Documentation
//! K-Means
use mathru::algebra::linear::{Vector, Matrix};
use crate::model::UnsupervisedLearn;

///
///
pub enum KMeansInitializer{
    //KMeansPluPlus,
    Random,
}

/// K-Means
///
/// For more information: <br>
/// <a href="https://en.wikipedia.org/wiki/K-means_clustering">https://en.wikipedia.org/wiki/K-means_clustering</a>
///
/// # Examples
///
/// ```
/// //use tinguely::{UnsupervisedLearn};
/// //use tinguely::clustering::{KMeans, KMeansInitializer};
/// //use mathru::algebra::linear::{Vector, Matrix};
///
/// //let training_data: Matrix<f64> = Matrix::new(3, 2, vec![1.0, 2.0, 3.0, 3.0, 1.0, 4.0]);
///
/// //let mut kmeans : KMeans = KMeans::new(3, 100, KMeansInitializer::Random);
/// //kmeans.train(&training_data);
///
/// //let test_data: Matrix<f64> = Matrix::new(1, 2, vec![2.0, 3.0]);
///
/// //let result: Vector<f64> = kmeans.predict(&test_data);
///```
pub struct KMeans
{
    k: usize,
    iter: usize,
    centroids: Matrix<f64>,
    initializer: KMeansInitializer
}

impl KMeans
{
    /// Creates a new k-means model
    ///
    /// # Arguments
    ///
    /// * 'k': Number of centroids
    /// * 'n': Number of iterations
    /// * 'init0': Method to initialize the centroids
    ///
    /// # Returns
    ///
    /// Instance of KMeans
    pub fn new(k: usize, n: usize, init: KMeansInitializer) -> KMeans
    {
        KMeans
        {
            k: k,
            iter: n,
            centroids: Matrix::zero(0, 0),
            initializer: init,
        }
    }

    /// Calculates the new centroids
    fn update_centroids<'a, 'b>(self: &'a mut Self, input: &'b Matrix<f64>)
    {
        let (_input_m, input_n): (usize, usize) = input.dim();

        let (classes, _distances): (Vector<f64>, Vector<f64>)= self.find_closest_centroids(input);


        let mut new_centroids: Matrix<f64>= self.centroids.clone();

        let mut row_indices: Vec<Vec<usize>> = vec![Vec::new(); self.k];

        // collect for every centroid the corresponding point from the input
        for i in 0..self.k
        {
            let centroid_index: f64 = *classes.get(i);
            row_indices.get_mut(centroid_index as usize).map(|f| f.push(i));
        }

        // Calculate the new centers
        for (i, vec_i) in row_indices.iter().enumerate()
        {

            let mut sum: Matrix<f64> = Matrix::zero(1, input_n);

            for v in vec_i.iter()
            {
                let row: Matrix<f64> = input.get_slice(*v, *v,0, input_n -1);
                sum = &sum + &row;
            }

            let num_points: usize = vec_i.len();
            if num_points != 0
            {
                sum = sum.apply(&|x| {x / (num_points as f64)});
                new_centroids = new_centroids.set_slice(&sum, i, 0);

            }
        }

        self.centroids = new_centroids;
    }

    // Find the centroid closest to each data point
    //
    // Returns the index of the closest centroid and the distance to it.
    fn find_closest_centroids(self: &Self, input: &Matrix<f64>) -> (Vector<f64>, Vector<f64>)
    {
        let (input_m, _input_n): (usize, usize) = input.dim();

        let mut index: Vector<f64> = Vector::zero(input_m);

        let mut dist: Vector<f64> = Vector::zero(input_m);

        for i in 0..input_m
        {
            let input_i: Vector<f64> = input.get_row(i);

            let mut dist_i: Vector<f64> = Vector::zero(self.k);

            for c in 0..self.k
		    {
		        let centroid: Vector<f64> = self.centroids.get_row(c);
		        let diff: Vector<f64> = (&centroid - &input_i).transpose();
		        *dist_i.get_mut(c) = diff.dotp(&diff);
			}

            // Take argmin (minimal distance between the point and a center) and this is the centroid.
            let min_index: usize = dist_i.argmin();
			let min_distance: f64 = *dist_i.get(min_index);
		    *dist.get_mut(i) = min_distance;
            *index.get_mut(i) = min_index as f64;
        }

        (index, dist)
    }

    fn random_init_centroids<'a, 'b>(self: &'a mut Self, input: &'b  Matrix<f64>)
    {
        let (_input_m, input_n) : (usize, usize) = input.dim();

        let centroids_indices: Vec<usize> = (0..self.k).map(|i| {
            i
        }).collect();


        let mut centroids = Matrix::zero(self.k, input_n);

        for (idx, value) in centroids_indices.iter().enumerate()
        {
            let row: Matrix<f64> = input.get_slice(*value, *value, 0, input_n - 1);

            centroids = centroids.set_slice(&row, idx, 0);
        }

        self.centroids = centroids;
    }

    pub fn centroids(&self) -> &Matrix<f64>
    {
        return &self.centroids;
    }
}

impl UnsupervisedLearn<Matrix<f64>, Vector<f64>> for KMeans
{
    /// Trains the model with the given samples
    ///
    fn train<'a, 'b>(self: &'a mut Self, input: &'b Matrix<f64>)
    {
        match self.initializer {
            KMeansInitializer::Random => self.random_init_centroids(input)

        }
        //Parameter check

        for _i in 0..self.iter
        {
           self.update_centroids(input);
        }

    }

    fn predict<'a, 'b>(self: &'a Self, input: &'b Matrix<f64>) -> Vector<f64>
    {
        let (indices, _centroids): (Vector<f64>, Vector<f64>) = self.find_closest_centroids(input);

        return indices;
    }
}